From f7dc83ccb4f82974f0c5d2647c65f674369acac7 Mon Sep 17 00:00:00 2001 From: Philip Fackler Date: Mon, 25 Sep 2023 11:04:30 -0400 Subject: [PATCH 1/3] Refactored everything needed for test_RotatedSPOsT --- src/Particle/CMakeLists.txt | 4 + src/Particle/InitMolecularSystemT.cpp | 314 +++ src/Particle/InitMolecularSystemT.h | 79 + src/Particle/LongRange/StructFactT.cpp | 2 +- src/Particle/LongRange/StructFactT.h | 9 +- src/Particle/MCWalkerConfigurationT.cpp | 313 +++ src/Particle/MCWalkerConfigurationT.h | 244 ++ src/Particle/ParticleIO/LatticeIO.cpp | 210 ++ src/Particle/ParticleIO/LatticeIO.h | 13 + src/Particle/ParticleIO/XMLParticleIO.cpp | 398 +++ src/Particle/ParticleIO/XMLParticleIO.h | 36 + src/Particle/ParticleSetPoolT.cpp | 278 ++ src/Particle/ParticleSetPoolT.h | 155 ++ src/Particle/ParticleSetT.BC.cpp | 194 ++ src/Particle/ParticleSetT.cpp | 212 ++ src/Particle/ParticleSetT.h | 12 +- src/Particle/ParticleSetTraits.h | 1 + src/Particle/ReptileT.h | 350 +++ src/Particle/SampleStackT.cpp | 81 + src/Particle/SampleStackT.h | 84 + src/Particle/SimulationCellT.h | 5 +- .../BsplineFactory/BsplineReaderBase.h | 1 + .../BsplineFactory/BsplineReaderBaseT.cpp | 259 ++ .../BsplineFactory/BsplineReaderBaseT.h | 228 ++ .../BsplineFactory/BsplineSetT.h | 10 +- .../HybridRepCenterOrbitalsT.cpp | 23 + .../BsplineFactory/HybridRepCenterOrbitalsT.h | 819 ++++++ .../BsplineFactory/HybridRepCplxT.h | 292 ++ .../BsplineFactory/HybridRepRealT.h | 303 ++ .../BsplineFactory/HybridRepSetReader.h | 1 + .../BsplineFactory/HybridRepSetReaderT.h | 492 ++++ .../BsplineFactory/SplineC2COMPTargetT.cpp | 2438 +++++++++-------- .../BsplineFactory/SplineC2COMPTargetT.h | 615 +++-- .../BsplineFactory/SplineC2CT.cpp | 1479 +++++----- .../BsplineFactory/SplineC2CT.h | 429 +-- ...TOMPTarget.cpp => SplineC2ROMPTargetT.cpp} | 126 +- ...eC2RTOMPTarget.h => SplineC2ROMPTargetT.h} | 83 +- .../BsplineFactory/SplineC2RT.cpp | 2333 ++++++++-------- .../BsplineFactory/SplineC2RT.h | 399 +-- .../BsplineFactory/SplineR2RT.cpp | 79 +- .../BsplineFactory/SplineR2RT.h | 41 +- .../BsplineFactory/SplineSetReader.h | 4 + .../BsplineFactory/SplineSetReaderT.h | 322 +++ .../BsplineFactory/createBsplineReaderT.cpp | 331 +++ .../BsplineFactory/createBsplineReaderT.h | 59 + src/QMCWaveFunctions/CMakeLists.txt | 29 +- src/QMCWaveFunctions/EinsplineSetBuilderT.cpp | 1815 ++++++++++++ src/QMCWaveFunctions/EinsplineSetBuilderT.h | 334 +++ src/QMCWaveFunctions/OrbitalSetTraits.h | 1 + src/QMCWaveFunctions/SPOSetT.h | 1 + src/QMCWaveFunctions/SpinorSetT.cpp | 2 +- src/QMCWaveFunctions/SpinorSetT.h | 3 +- src/QMCWaveFunctions/tests/CMakeLists.txt | 1 + .../tests/test_RotatedSPOsT.cpp | 1024 +++++++ src/mpi/mpi_datatype.h | 2 + 55 files changed, 13639 insertions(+), 3733 deletions(-) create mode 100644 src/Particle/InitMolecularSystemT.cpp create mode 100644 src/Particle/InitMolecularSystemT.h create mode 100644 src/Particle/MCWalkerConfigurationT.cpp create mode 100644 src/Particle/MCWalkerConfigurationT.h create mode 100644 src/Particle/ParticleSetPoolT.cpp create mode 100644 src/Particle/ParticleSetPoolT.h create mode 100644 src/Particle/ParticleSetT.BC.cpp create mode 100644 src/Particle/ReptileT.h create mode 100644 src/Particle/SampleStackT.cpp create mode 100644 src/Particle/SampleStackT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp create mode 100644 src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h rename src/QMCWaveFunctions/BsplineFactory/{SplineC2RTOMPTarget.cpp => SplineC2ROMPTargetT.cpp} (96%) rename src/QMCWaveFunctions/BsplineFactory/{SplineC2RTOMPTarget.h => SplineC2ROMPTargetT.h} (82%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp create mode 100644 src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h create mode 100644 src/QMCWaveFunctions/EinsplineSetBuilderT.cpp create mode 100644 src/QMCWaveFunctions/EinsplineSetBuilderT.h create mode 100644 src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt index b6517626c1..9dc57daf01 100644 --- a/src/Particle/CMakeLists.txt +++ b/src/Particle/CMakeLists.txt @@ -14,9 +14,11 @@ #################################### set(PARTICLE InitMolecularSystem.cpp + InitMolecularSystemT.cpp SimulationCell.cpp SimulationCellT.cpp ParticleSetPool.cpp + ParticleSetPoolT.cpp ParticleSet.cpp ParticleSetT.cpp PSdispatcher.cpp @@ -28,9 +30,11 @@ set(PARTICLE MCCoords.cpp MCCoordsT.cpp MCWalkerConfiguration.cpp + MCWalkerConfigurationT.cpp WalkerConfigurations.cpp SpeciesSet.cpp SampleStack.cpp + SampleStackT.cpp createDistanceTableAA.cpp createDistanceTableAB.cpp createDistanceTableT.cpp diff --git a/src/Particle/InitMolecularSystemT.cpp b/src/Particle/InitMolecularSystemT.cpp new file mode 100644 index 0000000000..a4559fc288 --- /dev/null +++ b/src/Particle/InitMolecularSystemT.cpp @@ -0,0 +1,314 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at +// Urbana-Champaign +// Luke Shulenburger, lshulen@sandia.gov, Sandia National +// Laboratories Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Miguel Morales, moralessilva2@llnl.gov, +// Lawrence Livermore National Laboratory Mark Dewing, +// markdewing@gmail.com, University of Illinois at +// Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak +// Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "InitMolecularSystemT.h" + +#include "OhmmsData/AttributeSet.h" +#include "Particle/DistanceTableT.h" +#include "Particle/ParticleSetPoolT.h" +#include "ParticleBase/RandomSeqGeneratorGlobal.h" + +namespace qmcplusplus +{ +template +InitMolecularSystemT::InitMolecularSystemT( + ParticleSetPoolT& pset, const char* aname) : + OhmmsElementBase(aname), + ptclPool(pset) +{ +} + +template +bool +InitMolecularSystemT::put(xmlNodePtr cur) +{ + std::string target("e"), source("i"), volume("no"); + OhmmsAttributeSet hAttrib; + hAttrib.add(target, "target"); + hAttrib.add(source, "source"); + hAttrib.add(volume, "use_volume"); + hAttrib.put(cur); + ParticleSetT* els = ptclPool.getParticleSet(target); + if (els == 0) { + ERRORMSG("No target particle " << target << " exists.") + return false; + } + ParticleSetT* ions = ptclPool.getParticleSet(source); + if (ions == 0) { + ERRORMSG("No source particle " << source << " exists.") + return false; + } + + app_log() << "" << std::endl; + + if (volume == "yes") + initWithVolume(ions, els); + else + initMolecule(ions, els); + + makeUniformRandom(els->spins); + els->spins *= 2 * M_PI; + + app_log() << "" << std::endl; + app_log().flush(); + + return true; +} + +template +void +InitMolecularSystemT::initAtom(ParticleSetT* ions, ParticleSetT* els) +{ + // 3N-dimensional Gaussian + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); + makeGaussRandom(chi); + RealType q = std::sqrt(static_cast(els->getTotalNum())) * 0.5; + int nel(els->getTotalNum()), items(0); + while (nel) { + els->R[items] = ions->R[0] + q * chi[items]; + --nel; + ++items; + } +} + +template +struct LoneElectronT +{ + using RealType = TReal; + int ID; + RealType BondLength; + inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl) + { + } +}; + +template +void +InitMolecularSystemT::initMolecule( + ParticleSetT* ions, ParticleSetT* els) +{ + if (ions->getTotalNum() == 1) + return initAtom(ions, els); + + const int d_ii_ID = ions->addTable(*ions); + ions->update(); + const typename ParticleSetT::ParticleIndex& grID(ions->GroupID); + SpeciesSet& Species(ions->getSpeciesSet()); + int Centers = ions->getTotalNum(); + std::vector Qtot(Centers), Qcore(Centers), Qval(Centers, 0); + // use charge as the core electrons first + int icharge = Species.addAttribute("charge"); + // Assign default core charge + for (int iat = 0; iat < Centers; iat++) + Qtot[iat] = static_cast(Species(icharge, grID[iat])); + // cutoff radius (Bohr) this a random choice + RealType cutoff = 4.0; + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); + // makeGaussRandom(chi); + makeSphereRandom(chi); + // the upper limit of the electron index with spin up + const int numUp = els->last(0); + // the upper limit of the electron index with spin down. Pay attention to + // the no spin down electron case. + const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0); + // consumer counter of random numbers chi + int random_number_counter = 0; + int nup_tot = 0, ndown_tot = numUp; + std::vector> loneQ; + RealType rmin = cutoff; + typename ParticleSetT::SingleParticlePos cm; + + const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); + // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, + // put Q[iat]-1 and save the lone electron. + for (size_t iat = 0; iat < Centers; iat++) { + cm += ions->R[iat]; + for (size_t jat = iat + 1; jat < Centers; ++jat) { + rmin = std::min(rmin, dist[jat][iat]); + } + // use 40% of the minimum bond + RealType sep = rmin * 0.4; + int v2 = Qtot[iat] / 2; + if (Qtot[iat] > v2 * 2) { + loneQ.push_back(LoneElectronT(iat, sep)); + } + for (int k = 0; k < v2; k++) { + // initialize electron positions in pairs + if (nup_tot < numUp) + els->R[nup_tot++] = + ions->R[iat] + sep * chi[random_number_counter++]; + if (ndown_tot < numDown) + els->R[ndown_tot++] = + ions->R[iat] + sep * chi[random_number_counter++]; + } + } + + // Step 2. Distribute the electrons left alone + // mmorales: changed order of spin assignment to help with spin + // imbalances in molecules at large distances. + // Not guaranteed to work, but should help in most cases + // as long as atoms in molecules are defined sequencially + typename std::vector>::iterator it(loneQ.begin()); + typename std::vector>::iterator it_end(loneQ.end()); + while (it != it_end && nup_tot != numUp && ndown_tot != numDown) { + if (nup_tot < numUp) { + els->R[nup_tot++] = ions->R[(*it).ID] + + (*it).BondLength * chi[random_number_counter++]; + ++it; + } + if (ndown_tot < numDown && it != it_end) { + els->R[ndown_tot++] = ions->R[(*it).ID] + + (*it).BondLength * chi[random_number_counter++]; + ++it; + } + } + + // Step 3. Handle more than neutral electrons + // extra electrons around the geometric center + RealType cnorm = 1.0 / static_cast(Centers); + RealType sep = rmin * 2; + cm = cnorm * cm; + if (nup_tot < numUp) + while (nup_tot < numUp) + els->R[nup_tot++] = cm + sep * chi[random_number_counter++]; + if (ndown_tot < numDown) + while (ndown_tot < numDown) + els->R[ndown_tot++] = cm + sep * chi[random_number_counter++]; + + // safety check. all the random numbers should have been consumed once and + // only once. + if (random_number_counter != chi.size()) + throw std::runtime_error("initMolecule unexpected random number " + "consumption. Please report a bug!"); + + // put all the electrons in a unit box + if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) { + els->R.setUnit(PosUnit::Cartesian); + els->applyBC(els->R); + els->update(false); + } +} + +/// helper function to determine the lower bound of a domain (need to move up) +template +inline TinyVector +lower_bound(const TinyVector& a, const TinyVector& b) +{ + return TinyVector( + std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2])); +} + +/// helper function to determine the upper bound of a domain (need to move up) +template +inline TinyVector +upper_bound(const TinyVector& a, const TinyVector& b) +{ + return TinyVector( + std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2])); +} + +template +void +InitMolecularSystemT::initWithVolume( + ParticleSetT* ions, ParticleSetT* els) +{ + TinyVector start(1.0); + TinyVector end(0.0); + + typename ParticleSetT::ParticlePos Ru(ions->getTotalNum()); + Ru.setUnit(PosUnit::Lattice); + ions->applyBC(ions->R, Ru); + + for (int iat = 0; iat < Ru.size(); iat++) { + start = lower_bound(Ru[iat], start); + end = upper_bound(Ru[iat], end); + } + + TinyVector shift; + Tensor newbox(ions->getLattice().R); + + RealType buffer = 2.0; // buffer 2 bohr + for (int idim = 0; idim < OHMMS_DIM; ++idim) { + // if(ions->getLattice().BoxBConds[idim]) + //{ + // start[idim]=0.0; + // end[idim]=1.0; + // shift[idim]=0.0; + // } + // else + { + RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim]; + start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r)); + end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r)); + shift[idim] = start[idim] * ions->getLattice().Length[idim]; + if (std::abs(end[idim] = start[idim]) < + buffer) { // handle singular case + start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0); + end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0); + } + + newbox(idim, idim) = + (end[idim] - start[idim]) * ions->getLattice().Length[idim]; + } + } + + typename ParticleSetT::ParticleLayout slattice(ions->getLattice()); + slattice.set(newbox); + + app_log() << " InitMolecularSystem::initWithVolume " << std::endl; + app_log() << " Effective Lattice shifted by " << shift << std::endl; + app_log() << newbox << std::endl; + + Ru.resize(els->getTotalNum()); + makeUniformRandom(Ru); + for (int iat = 0; iat < Ru.size(); ++iat) + els->R[iat] = slattice.toCart(Ru[iat]) + shift; + els->R.setUnit(PosUnit::Cartesian); +} + +template +bool +InitMolecularSystemT::put(std::istream& is) +{ + return true; +} + +template +bool +InitMolecularSystemT::get(std::ostream& os) const +{ + return true; +} + +template +void +InitMolecularSystemT::reset() +{ +} + +template class InitMolecularSystemT; +template class InitMolecularSystemT; +template class InitMolecularSystemT>; +template class InitMolecularSystemT>; + +} // namespace qmcplusplus diff --git a/src/Particle/InitMolecularSystemT.h b/src/Particle/InitMolecularSystemT.h new file mode 100644 index 0000000000..3bfe148db5 --- /dev/null +++ b/src/Particle/InitMolecularSystemT.h @@ -0,0 +1,79 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_INITMOLECULARSYSTEMT_H +#define QMCPLUSPLUS_INITMOLECULARSYSTEMT_H + +#include "OhmmsData/OhmmsElementBase.h" +#include "ParticleSetTraits.h" + +#include + +namespace qmcplusplus +{ +template +class ParticleSetT; +template +class ParticleSetPoolT; + +/* Engine to initialize the initial electronic structure for a molecular system + */ +template +class InitMolecularSystemT : public OhmmsElementBase +{ +public: + using RealType = typename ParticleSetTraits::RealType; + + InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname = "mosystem"); + + bool + get(std::ostream& os) const override; + bool + put(std::istream& is) override; + bool + put(xmlNodePtr cur) override; + void + reset() override; + + /** initialize els for an atom + */ + void + initAtom(ParticleSetT* ions, ParticleSetT* els); + /** initialize els position for a molecule + * + * Use the valence of each ionic species on a sphere + */ + void + initMolecule(ParticleSetT* ions, ParticleSetT* els); + /** initialize els for the systems with a mixed boundary + * + * Use the bound of the ionic systems and uniform random positions within a + * reduced box + */ + void + initWithVolume(ParticleSetT* ions, ParticleSetT* els); + +private: + /** pointer to ParticleSetPool + * + * QMCHamiltonian needs to know which ParticleSet object + * is used as an input object for the evaluations. + * Any number of ParticleSet can be used to describe + * a QMCHamiltonian. + */ + ParticleSetPoolT& ptclPool; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/LongRange/StructFactT.cpp b/src/Particle/LongRange/StructFactT.cpp index 6f1dae8a9e..363d364c68 100644 --- a/src/Particle/LongRange/StructFactT.cpp +++ b/src/Particle/LongRange/StructFactT.cpp @@ -32,7 +32,7 @@ namespace qmcplusplus // Constructor - pass arguments to k_lists_' constructor template StructFactT::StructFactT( - const ParticleLayout& lattice, const KContainer& k_lists) : + const ParticleLayout& lattice, const KContainerT& k_lists) : SuperCellEnum(SUPERCELL_BULK), k_lists_(k_lists), StorePerParticle(false), diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h index 218b3adf31..e61ed50beb 100644 --- a/src/Particle/LongRange/StructFactT.h +++ b/src/Particle/LongRange/StructFactT.h @@ -28,7 +28,8 @@ namespace qmcplusplus { template class ParticleSetT; -class KContainer; +template +class KContainerT; template struct SKMultiWalkerMemT; @@ -65,7 +66,7 @@ class StructFactT * At least in the batched version Structure factor is _NOT_ valid * after construction. */ - StructFactT(const ParticleLayout& lattice, const KContainer& k_lists); + StructFactT(const ParticleLayout& lattice, const KContainerT& k_lists); /// desructor ~StructFactT(); @@ -100,7 +101,7 @@ class StructFactT } /// accessor of k_lists_ - const KContainer& + const KContainerT& getKLists() const { return k_lists_; @@ -119,7 +120,7 @@ class StructFactT resize(int nkpts, int num_species, int num_ptcls); /// K-Vector List. - const KContainer& k_lists_; + const KContainerT& k_lists_; /** Whether intermediate data is stored per particle. default false * storing data per particle needs significant amount of memory but some * calculation may request it. storing data per particle specie is more diff --git a/src/Particle/MCWalkerConfigurationT.cpp b/src/Particle/MCWalkerConfigurationT.cpp new file mode 100644 index 0000000000..1f3fcaa1c0 --- /dev/null +++ b/src/Particle/MCWalkerConfigurationT.cpp @@ -0,0 +1,313 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at +// Urbana-Champaign +// Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge +// National Laboratory Ye Luo, yeluo@anl.gov, Argonne +// National Laboratory Mark A. Berrill, berrillma@ornl.gov, +// Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "MCWalkerConfigurationT.h" + +#include "LongRange/StructFact.h" +#include "Message/CommOperators.h" +#include "Message/Communicate.h" +#include "Particle/HDFWalkerOutput.h" +#include "Particle/MCSample.h" +#include "Particle/ReptileT.h" +#include "ParticleBase/RandomSeqGenerator.h" +#include "Utilities/IteratorUtility.h" +#include "hdf/HDFVersion.h" +#include "hdf/hdf_hyperslab.h" + +#include + +namespace qmcplusplus +{ +template +MCWalkerConfigurationT::MCWalkerConfigurationT( + const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind) : + ParticleSetT(simulation_cell, kind), + ReadyForPbyP(false), + UpdateMode(Update_Walker), + reptile(0), + Polymer(0) +{ +} + +template +MCWalkerConfigurationT::MCWalkerConfigurationT( + const MCWalkerConfigurationT& mcw) : + ParticleSetT(mcw), + ReadyForPbyP(false), + UpdateMode(Update_Walker), + Polymer(0) +{ + samples.clearEnsemble(); + samples.setMaxSamples(mcw.getMaxSamples()); + setWalkerOffsets(mcw.getWalkerOffsets()); + this->Properties = mcw.Properties; +} + +template +MCWalkerConfigurationT::~MCWalkerConfigurationT() = default; + +template +void +MCWalkerConfigurationT::createWalkers(int n) +{ + const int old_nw = getActiveWalkers(); + WalkerConfigurations::createWalkers(n, this->TotalNum); + // no pre-existing walkers, need to initialized based on particleset. + if (old_nw == 0) + for (auto& awalker : walker_list_) { + awalker->R = this->R; + awalker->spins = this->spins; + } + resizeWalkerHistories(); +} + +template +void +MCWalkerConfigurationT::resize(int numWalkers, int numPtcls) +{ + if (this->TotalNum && walker_list_.size()) + app_warning() + << "MCWalkerConfiguration::resize cleans up the walker list." + << std::endl; + const int old_nw = getActiveWalkers(); + ParticleSetT::resize(unsigned(numPtcls)); + WalkerConfigurations::resize(numWalkers, this->TotalNum); + // no pre-existing walkers, need to initialized based on particleset. + if (old_nw == 0) + for (auto& awalker : walker_list_) { + awalker->R = this->R; + awalker->spins = this->spins; + } +} + +/** Make Metropolis move to the walkers and save in a temporary array. + * @param it the iterator of the first walker to work on + * @param tauinv inverse of the time step + * + * R + D + X + */ +template +void +MCWalkerConfigurationT::sample(iterator it, RealType tauinv) +{ + throw std::runtime_error("MCWalkerConfiguration::sample obsolete"); + // makeGaussRandom(R); + // R *= tauinv; + // R += (*it)->R + (*it)->Drift; +} + +/** reset the Property container of all the walkers + */ +template +void +MCWalkerConfigurationT::resetWalkerProperty(int ncopy) +{ + int m(this->PropertyList.size()); + app_log() << " Resetting Properties of the walkers " << ncopy << " x " << m + << std::endl; + try { + this->Properties.resize(ncopy, m); + } + catch (std::domain_error& de) { + app_error() << de.what() << '\n' + << "This is likely because some object has attempted to " + "add walker properties\n" + << " in excess of WALKER_MAX_PROPERTIES.\n" + << "build with cmake ... " + "-DWALKER_MAX_PROPERTIES=at_least_properties_required" + << std::endl; + APP_ABORT("Fatal Exception"); + } + + for (auto& walker : walker_list_) { + walker->resizeProperty(ncopy, m); + walker->Weight = 1.0; + } + resizeWalkerHistories(); +} + +template +void +MCWalkerConfigurationT::resizeWalkerHistories() +{ + // using std::vector > is too costly. + int np = this->PropertyHistory.size(); + if (np) + for (int iw = 0; iw < walker_list_.size(); ++iw) + walker_list_[iw]->PropertyHistory = this->PropertyHistory; + np = this->PHindex.size(); + if (np) + for (int iw = 0; iw < walker_list_.size(); ++iw) + walker_list_[iw]->PHindex = this->PHindex; + ; +} + +/** allocate the SampleStack + * @param n number of samples per thread + */ +template +void +MCWalkerConfigurationT::setNumSamples(int n) +{ + samples.clearEnsemble(); + samples.setMaxSamples(n); +} + +/** save the current walkers to SampleStack + */ +template +void +MCWalkerConfigurationT::saveEnsemble() +{ + saveEnsemble(walker_list_.begin(), walker_list_.end()); +} + +/** save the [first,last) walkers to SampleStack + */ +template +void +MCWalkerConfigurationT::saveEnsemble(iterator first, iterator last) +{ + for (; first != last; first++) { + samples.appendSample(MCSample(**first)); + } +} +/** load a single sample from SampleStack + */ +template +void +MCWalkerConfigurationT::loadSample(ParticleSetT& pset, size_t iw) const +{ + samples.loadSample(pset, iw); +} + +/** load SampleStack to walker_list_ + */ +template +void +MCWalkerConfigurationT::loadEnsemble() +{ + using WP = WalkerProperties::Indexes; + int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples()); + if (samples.empty() || nsamples == 0) + return; + Walker_t::PropertyContainer_t prop( + 1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); + walker_list_.resize(nsamples); + for (int i = 0; i < nsamples; ++i) { + auto awalker = std::make_unique(this->TotalNum); + awalker->Properties.copy(prop); + samples.getSample(i).convertToWalker(*awalker); + walker_list_[i] = std::move(awalker); + } + resizeWalkerHistories(); + samples.clearEnsemble(); +} + +template +bool +MCWalkerConfigurationT::dumpEnsemble( + std::vector*>& others, HDFWalkerOutput& out, + int np, int nBlock) +{ + WalkerConfigurations wctemp; + for (auto* mcwc : others) { + const auto& astack(mcwc->getSampleStack()); + const size_t sample_size = + std::min(mcwc->getMaxSamples(), mcwc->numSamples()); + for (int j = 0; j < sample_size; ++j) { + const auto& sample = astack.getSample(j); + const size_t num_ptcls = sample.getNumPtcls(); + auto awalker = std::make_unique(num_ptcls); + sample.convertToWalker(*awalker); + wctemp.push_back(std::move(awalker)); + } + } + const int w = wctemp.getActiveWalkers(); + if (w == 0) + return false; + + // The following code assumes the same amount of active walkers on all the + // MPI ranks + std::vector nwoff(np + 1, 0); + for (int ip = 0; ip < np; ++ip) + nwoff[ip + 1] = nwoff[ip] + w; + wctemp.setWalkerOffsets(nwoff); + out.dump(wctemp, nBlock); + return true; +} + +template +int +MCWalkerConfigurationT::getMaxSamples() const +{ + return samples.getMaxSamples(); +} + +template +void +MCWalkerConfigurationT::loadEnsemble( + std::vector*>& others, bool doclean) +{ + using WP = WalkerProperties::Indexes; + std::vector off(others.size() + 1, 0); + for (int i = 0; i < others.size(); ++i) { + off[i + 1] = off[i] + + std::min(others[i]->getMaxSamples(), others[i]->numSamples()); + } + int nw_tot = off.back(); + if (nw_tot) { + Walker_t::PropertyContainer_t prop( + 1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); + while (walker_list_.size()) + pop_back(); + walker_list_.resize(nw_tot); + for (int i = 0; i < others.size(); ++i) { + SampleStackT& astack(others[i]->getSampleStack()); + for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw) { + auto awalker = std::make_unique(this->TotalNum); + awalker->Properties.copy(prop); + astack.getSample(j).convertToWalker(*awalker); + walker_list_[iw] = std::move(awalker); + } + if (doclean) + others[i]->clearEnsemble(); + } + } + if (doclean) + resizeWalkerHistories(); +} + +template +void +MCWalkerConfigurationT::clearEnsemble() +{ + samples.clearEnsemble(); +} + +template class MCWalkerConfigurationT; +template class MCWalkerConfigurationT; +template class MCWalkerConfigurationT>; +template class MCWalkerConfigurationT>; + +} // namespace qmcplusplus diff --git a/src/Particle/MCWalkerConfigurationT.h b/src/Particle/MCWalkerConfigurationT.h new file mode 100644 index 0000000000..49a159e51d --- /dev/null +++ b/src/Particle/MCWalkerConfigurationT.h @@ -0,0 +1,244 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at +// Urbana-Champaign +// Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge +// National Laboratory Raymond Clay III, +// j.k.rofling@gmail.com, Lawrence Livermore National +// Laboratory Ye Luo, yeluo@anl.gov, Argonne National +// Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge +// National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file MCWalkerConfiguration.h + * @brief Declaration of a MCWalkerConfiguration + */ +#ifndef QMCPLUSPLUS_MCWALKERCONFIGURATIONT_H +#define QMCPLUSPLUS_MCWALKERCONFIGURATIONT_H +#include "Particle/ParticleSetT.h" +#include "Particle/SampleStackT.h" +#include "Particle/Walker.h" +#include "Particle/WalkerConfigurations.h" +#include "Utilities/IteratorUtility.h" + +namespace qmcplusplus +{ +// Forward declaration +class MultiChain; +class HDFWalkerOutput; +template +class ReptileT; + +/** A set of walkers that are to be advanced by Metropolis Monte Carlo. + * + *As a derived class from ParticleSet, MCWalkerConfiguration interacts with + *QMCHamiltonian and TrialWaveFunction as a ParticleSet, while QMCDrivers + *use it as multiple walkers whose configurations are advanced according + to MC algorithms. + * + Each walker is represented by Walker and + *MCWalkerConfiguration contains a list of + *the walkers. This class enables two possible moves: + *
    + *
  • move the entire active walkers, similarly to molecu. Suitable for + *small and big systems with a small time step. + *
  • move a particle for each walker. Suitable for large systems. + + *
+ */ +template +class MCWalkerConfigurationT : + public ParticleSetT, + public WalkerConfigurations +{ +public: + /**enumeration for update*/ + enum + { + Update_All = 0, /// move all the active walkers + Update_Walker, /// move a walker by walker + Update_Particle /// move a particle by particle + }; + + using Walker_t = WalkerConfigurations::Walker_t; + /// container type of the Properties of a Walker + using PropertyContainer_t = Walker_t::PropertyContainer_t; + /// container type of Walkers + using WalkerList_t = std::vector>; + /// FIX: a type alias of iterator for an object should not be for just one + /// of many objects it holds. + using iterator = WalkerList_t::iterator; + /// const_iterator of Walker container + using const_iterator = WalkerList_t::const_iterator; + + using ReptileList_t = UPtrVector>; + + using RealType = typename ParticleSetT::RealType; + + /// default constructor + MCWalkerConfigurationT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); + + /// default constructor: copy only ParticleSet + MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw); + ~MCWalkerConfigurationT(); + /** create numWalkers Walkers + * + * Append Walkers to WalkerList. + */ + void + createWalkers(int numWalkers); + /// clean up the walker list and make a new list + void + resize(int numWalkers, int numPtcls); + + /// clean up the walker list + using WalkerConfigurations::clear; + /// resize Walker::PropertyHistory and Walker::PHindex: + void + resizeWalkerHistories(); + + /// make random moves for all the walkers + // void sample(iterator first, iterator last, value_type tauinv); + /// make a random move for a walker + void + sample(iterator it, RealType tauinv); + + /// return the number of particles per walker + inline int + getParticleNum() const + { + return this->R.size(); + } + /**@}*/ + + /** set LocalEnergy + * @param e current average Local Energy + */ + inline void + setLocalEnergy(RealType e) + { + LocalEnergy = e; + } + + /** return LocalEnergy + */ + inline RealType + getLocalEnergy() const + { + return LocalEnergy; + } + + inline MultiChain* + getPolymer() + { + return Polymer; + } + + inline void + setPolymer(MultiChain* chain) + { + Polymer = chain; + } + + void + resetWalkerProperty(int ncopy = 1); + + inline bool + updatePbyP() const + { + return ReadyForPbyP; + } + + //@{save/load/clear function for optimization + // + int + numSamples() const + { + return samples.getNumSamples(); + } + /// set the number of max samples + void + setNumSamples(int n); + /// save the position of current walkers to SampleStack + void + saveEnsemble(); + /// save the position of current walkers + void + saveEnsemble(iterator first, iterator last); + /// load a single sample from SampleStack + void + loadSample(ParticleSetT& pset, size_t iw) const; + /// load SampleStack data to the current list of walker configurations + void + loadEnsemble(); + /// load the SampleStacks of others to the current list of walker + /// configurations + void + loadEnsemble( + std::vector*>& others, bool doclean = true); + /** dump Samples to a file + * @param others MCWalkerConfigurations whose samples will be collected + * @param out engine to write the samples to state_0/walkers + * @param np number of processors + * @return true with non-zero samples + * + * CAUTION: The current implementation assumes the same amount of active + * walkers on all the MPI ranks. + */ + static bool + dumpEnsemble(std::vector*>& others, + HDFWalkerOutput& out, int np, int nBlock); + /// clear the ensemble + void + clearEnsemble(); + + const SampleStackT& + getSampleStack() const + { + return samples; + } + SampleStackT& + getSampleStack() + { + return samples; + } + + /// Transitional forwarding methods + int + getMaxSamples() const; + //@} + +protected: + /// true if the buffer is ready for particle-by-particle updates + bool ReadyForPbyP; + /// update-mode index + int UpdateMode; + + RealType LocalEnergy; + +public: + /// a collection of reptiles contained in MCWalkerConfiguration. + ReptileList_t ReptileList; + ReptileT* reptile; + + friend class MCPopulation; + +private: + MultiChain* Polymer; + + SampleStackT samples; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/ParticleIO/LatticeIO.cpp b/src/Particle/ParticleIO/LatticeIO.cpp index 2d8ea238e0..0fe1756969 100644 --- a/src/Particle/ParticleIO/LatticeIO.cpp +++ b/src/Particle/ParticleIO/LatticeIO.cpp @@ -233,6 +233,216 @@ bool LatticeParser::put(xmlNodePtr cur) } +template +bool LatticeParserT::put(xmlNodePtr cur) +{ + const int DIM = ParticleLayout::SingleParticlePos::Size; + double a0 = 1.0; + double rs = -1.0; + int nptcl = 0; + int nsh = 0; //for backwards compatibility w/ odd heg initialization style + int pol = 0; + using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex; + TinyVector bconds("p"); + + Tensor lattice_in; + bool lattice_defined = false; + bool bconds_defined = false; + int boxsum = 0; + + app_summary() << std::endl; + app_summary() << " Lattice" << std::endl; + app_summary() << " -------" << std::endl; + cur = cur->xmlChildrenNode; + while (cur != NULL) + { + std::string cname((const char*)cur->name); + if (cname == "parameter") + { + const std::string aname(getXMLAttributeValue(cur, "name")); + if (aname == "scale") + { + putContent(a0, cur); + } + else if (aname == "lattice") + { + const std::string units_prop(getXMLAttributeValue(cur, "units")); + if (!units_prop.empty() && units_prop != "bohr") + { + std::ostringstream err_msg; + err_msg << "LatticeParser::put. Only atomic units (bohr) supported for lattice units. Input file uses: " + << units_prop; + throw UniformCommunicateError(err_msg.str()); + } + + putContent(lattice_in, cur); + lattice_defined = true; + //putContent(ref_.R,cur); + } + else if (aname == "bconds") + { + putContent(bconds, cur); + bconds_defined = true; + for (int idir = 0; idir < DIM; idir++) + { + char b = bconds[idir][0]; + if (b == 'n' || b == 'N') + { + ref_.BoxBConds[idir] = false; + } + else if (b == 'p' || b == 'P') + { + ref_.BoxBConds[idir] = true; + boxsum++; + } + else + { + std::ostringstream err_msg; + err_msg << "LatticeParser::put. Unknown label '" + bconds[idir] + + "' used for periodicity. Only 'p', 'P', 'n' and 'N' are valid!"; + throw UniformCommunicateError(err_msg.str()); + } + + // Protect BCs which are not implemented. + if (idir > 0 && !ref_.BoxBConds[idir - 1] && ref_.BoxBConds[idir]) + { + std::ostringstream err_msg; + err_msg + << "LatticeParser::put. In \"bconds\", non periodic directions must be placed after the periodic ones."; + throw UniformCommunicateError(err_msg.str()); + } + } + } + else if (aname == "vacuum") + { + putContent(ref_.VacuumScale, cur); + } + else if (aname == "LR_dim_cutoff") + { + putContent(ref_.LR_dim_cutoff, cur); + } + else if (aname == "LR_handler") + { + std::string handler_type("opt_breakup"); + //This chops whitespace so the simple str == comparisons work + putContent(handler_type, cur); + handler_type = lowerCase(handler_type); + if (handler_type == "ewald") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::EWALD; + else if (handler_type == "opt_breakup") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::ESLER; + else if (handler_type == "opt_breakup_original") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::NATOLI; + else if (handler_type == "ewald_strict2d") + { + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::STRICT2D; + ref_.ndim = 2; + } + else if (handler_type == "ewald_quasi2d") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::QUASI2D; + else + throw UniformCommunicateError("LatticeParser::put. Long range breakup handler not recognized."); + } + else if (aname == "LR_tol") + { + putContent(ref_.LR_tol, cur); + } + else if (aname == "rs") + { + lattice_defined = true; + OhmmsAttributeSet rAttrib; + rAttrib.add(nptcl, "condition"); + rAttrib.add(pol, "polarized"); + rAttrib.add(nsh, "shell"); + rAttrib.put(cur); + putContent(rs, cur); + } + else if (aname == "nparticles") + { + putContent(nptcl, cur); + } + } + cur = cur->next; + } + + // checking boundary conditions + if (lattice_defined) + { + if (!bconds_defined) + { + app_log() << " Lattice is specified but boundary conditions are not. Assuming PBC." << std::endl; + ref_.BoxBConds = true; + } + } + else if (boxsum == 0) + app_log() << " Lattice is not specified for the Open BC. Add a huge box." << std::endl; + else + throw UniformCommunicateError("LatticeParser::put. Mixed boundary is supported only when a lattice is specified!"); + + //special heg processing + if (rs > 0.0) + { + HEGGrid heg(ref_); + if (pol == 0) + { + if (nsh > 0) + nptcl = 2 * heg.getNumberOfKpoints(nsh); + else + nsh = heg.getShellIndex(nptcl / 2); + } + else + { // spin polarized + if (nsh > 0) + nptcl = heg.getNumberOfKpoints(nsh); + else + nsh = heg.getShellIndex(nptcl); + } + typename ParticleLayout::Scalar_t acubic = heg.getCellLength(nptcl, rs); + app_log() << " " << OHMMS_DIM << "D HEG system" + << "\n rs = " << rs; + if (pol == 0) + { + app_log() << "\n number of up particles = " << nptcl / 2 << "\n number of dn particles = " << nptcl / 2; + } + else + { + app_log() << "\n number of up particles = " << nptcl; + } + app_log() << "\n filled kshells = " << nsh << "\n lattice constant = " << acubic << " bohr" + << std::endl; + lattice_in = 0.0; + for (int idim = 0; idim < DIM; idim++) + lattice_in(idim, idim) = acubic; + a0 = 1.0; + } + + if (lattice_defined) + { + lattice_in *= a0; + ref_.set(lattice_in); + } + + if (ref_.SuperCellEnum != SUPERCELL_SLAB && LRCoulombSingleton::isQuasi2D()) + throw UniformCommunicateError("LatticeParser::put. Quasi 2D Ewald only works with boundary condition 'p p n'!"); + + if (ref_.SuperCellEnum == SUPERCELL_OPEN) + ref_.WignerSeitzRadius = ref_.SimulationCellRadius; + + std::string unit_name = "bohr"; + app_log() << std::fixed; + app_log() << " Simulation cell radius = " << ref_.SimulationCellRadius << " " << unit_name << std::endl; + app_log() << " Wigner-Seitz cell radius = " << ref_.WignerSeitzRadius << " " << unit_name << std::endl; + app_log() << std::endl; + + return lattice_defined; +} + +template class LatticeParserT; +template class LatticeParserT; +template class LatticeParserT>; +template class LatticeParserT>; + + bool LatticeXMLWriter::get(std::ostream& os) const { os << "" << std::endl; diff --git a/src/Particle/ParticleIO/LatticeIO.h b/src/Particle/ParticleIO/LatticeIO.h index a52e17858d..41e3da8790 100644 --- a/src/Particle/ParticleIO/LatticeIO.h +++ b/src/Particle/ParticleIO/LatticeIO.h @@ -17,6 +17,7 @@ #include "OhmmsData/OhmmsElementBase.h" #include "Configuration.h" +#include "ParticleSetTraits.h" namespace qmcplusplus { @@ -31,6 +32,18 @@ class LatticeParser }; +template +class LatticeParserT +{ + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + ParticleLayout& ref_; + +public: + LatticeParserT(ParticleLayout& lat) : ref_(lat) {} + bool put(xmlNodePtr cur); +}; + + class LatticeXMLWriter { using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; diff --git a/src/Particle/ParticleIO/XMLParticleIO.cpp b/src/Particle/ParticleIO/XMLParticleIO.cpp index 26b9d658e7..e5c29cbc59 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.cpp +++ b/src/Particle/ParticleIO/XMLParticleIO.cpp @@ -487,6 +487,404 @@ void XMLParticleParser::getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_si } + +template +XMLParticleParserT::XMLParticleParserT(Particle_t& aptcl) : ref_(aptcl) +{ + //add ref particle attributes + ref_.createAttributeList(ref_AttribList); +} + +/** process xmlnode <particleset/> which contains everything about the particle set to initialize + *@param cur the xmlnode to work on + * + */ +template +bool XMLParticleParserT::readXML(xmlNodePtr cur) +{ + ReportEngine PRE("XMLParticleParser", "readXML"); + + if (ref_.getTotalNum()) + throw UniformCommunicateError("The ParticleSet object to load XML input was not empty. Report a bug!"); + + SpeciesSet& tspecies(ref_.getSpeciesSet()); + if (tspecies.size() != 0) + throw UniformCommunicateError("The SpeciesSet object to load XML input was not empty. Report a bug!"); + + // the total number of particles, once it is set non-zero, always check against it. + int nat = 0; + // the number of particles by group, once it is constructed, always check against it. + std::vector nat_group; + + std::string pname("none"); + std::string randomizeR("no"); + OhmmsAttributeSet pAttrib; + pAttrib.add(randomizeR, "random"); + pAttrib.add(nat, "size"); + pAttrib.add(pname, "name"); + pAttrib.put(cur); + + ref_.setName(pname.c_str()); + + if (nat != 0) + { + app_debug() << "Set the total size " << nat + << " by the 'size' attribute found in 'particleset' XML element node named '" << pname << "'." + << std::endl; + } + + bool ionid_found = false; + { // parse all the 'group's to obtain or verify the total number of particles + //total count of the particles to be created + int ntot = 0; + int num_non_zero_group = 0; + bool group_found = false; + + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "atom") + throw UniformCommunicateError("XML element node atom is no more supported"); + else if (cname.find("ell") < cname.size()) //accept UnitCell, unitcell, supercell + throw UniformCommunicateError("Constructing cell inside particleset is illegal!"); + else if (cname == "group") + { + group_found = true; + std::string sname = getXMLAttributeValue(element, "name"); + if (sname.empty()) + throw UniformCommunicateError("'group' element node must include a name attribute!"); + else + { + const int sid = tspecies.addSpecies(sname); + setSpeciesProperty(tspecies, sid, element); + } + + int nat_per_group = 0; + OhmmsAttributeSet gAttrib; + gAttrib.add(nat_per_group, "size"); + gAttrib.put(element); + + nat_group.push_back(nat_per_group); + ntot += nat_per_group; + if (nat_per_group > 0) + num_non_zero_group++; + } + else if (cname == attrib_tag && getXMLAttributeValue(element, "name") == ionid_tag) + ionid_found = true; + }); + + if (!group_found) + throw UniformCommunicateError("No 'group' XML element node was found. Check XML input!"); + + if (nat != 0 && ntot != 0 && nat != ntot) + { + std::ostringstream msg; + msg << "The total number of particles deterimined previously was " << nat + << "but the sum of the sizes from all the 'group' XML element nodes is " << ntot + << ". Please check the 'particleset' XML element node!" << std::endl; + throw UniformCommunicateError(msg.str()); + } + + if (nat == 0 && ntot != 0) + { + nat = ntot; + app_debug() << "Set the total size " << nat << " by the sum of the 'size's on all the 'group' XML element nodes." + << std::endl; + } + + if (ntot > 0 && num_non_zero_group != nat_group.size()) + throw UniformCommunicateError("Some 'group' XML element node doesn't contain a 'size' attribute! 'size = 0' is not allowed in the input. Make appropriate adjustments to the input or converter."); + } + + { // parse all the 'attrib's to obtain or verify the total number of particles + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag) + { + std::string sname = getXMLAttributeValue(element, "name"); + if (sname.empty()) + throw UniformCommunicateError("'" + ParticleTags::attrib_tag + + "' XML element node must include a name attribute!"); + + int size_att = 0; + OhmmsAttributeSet aAttrib; + aAttrib.add(size_att, "size"); + aAttrib.put(element); + + if (nat != 0 && size_att != 0 && nat != size_att) + { + std::ostringstream msg; + msg << "The total number of particles deterimined previously was " << nat + << " but the 'size' atttribute found on the '" << ParticleTags::attrib_tag + << "' XML element nodes named '" << sname << "' is " << size_att + << ". Please check the 'particleset' XML element node!" << std::endl; + throw UniformCommunicateError(msg.str()); + } + + if (nat == 0 && size_att != 0) + { + nat = size_att; + app_debug() << "Set the total size " << nat << " by the 'size' on the '" << ParticleTags::attrib_tag + << "' XML element node named '" << sname << "'." << std::endl; + } + } + }); + } + + if (nat == 0) + throw UniformCommunicateError("Failed in figuring out the total number of particles. Check XML input!"); + + if (ionid_found) + { // parse ionid and construct input order to stored order + std::vector map_storage_to_input(nat); + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag && getXMLAttributeValue(element, "name") == ionid_tag) + { + std::string datatype = getXMLAttributeValue(element, datatype_tag); + if (datatype != stringtype_tag) + throw UniformCommunicateError("'ionid' only supports datatype=\"" + stringtype_tag + "\""); + std::vector d_in(nat); + putContent(d_in, element); + bool input_ungrouped = false; + int storage_index = 0; + for (int ig = 0; ig < nat_group.size(); ig++) + { + const auto& group_species_name = tspecies.getSpeciesName(ig); + int count_group_size = 0; + for (int iat = 0; iat < nat; iat++) + { + const int element_index = tspecies.findSpecies(d_in[iat]); + if (element_index == tspecies.size()) + throw UniformCommunicateError("Element " + d_in[iat] + + " doesn't match any species from 'group' XML element nodes."); + if (element_index == ig) + { + if (iat != storage_index) + input_ungrouped = true; + count_group_size++; + map_storage_to_input[storage_index++] = iat; + } + } + + if (count_group_size == 0) + throw UniformCommunicateError("Element '" + group_species_name + "' not found in 'ionid'."); + + if (nat_group[ig] == 0) + nat_group[ig] = count_group_size; + else if (nat_group[ig] != count_group_size) + { + std::ostringstream msg; + msg << "The number of particles of element '" << group_species_name << "' from 'group' XML elment node was " + << nat_group[ig] << " but 'ionid' contains " << count_group_size << " entries." << std::endl; + throw UniformCommunicateError(msg.str()); + } + } + + if (input_ungrouped) + { + app_log() << " Input particle set is not grouped by species. Remapping particle position indices " + "internally." + << std::endl; + app_debug() << " Species : input particle index -> internal particle index" << std::endl; + for (int new_idx = 0; new_idx < map_storage_to_input.size(); new_idx++) + { + int old_idx = map_storage_to_input[new_idx]; + if (new_idx != old_idx) + { + app_debug() << " " << d_in[old_idx] << " : " << old_idx << " -> " << new_idx << std::endl; + } + } + } + } + }); + + checkGrouping(nat, nat_group); + ref_.create(nat_group); + // save map_storage_to_input + ref_.setMapStorageToInput(map_storage_to_input); + + for (int iat = 0; iat < nat; iat++) + { + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag && getXMLAttributeValue(element, "name") != ionid_tag) + getPtclAttrib(element, map_storage_to_input[iat], 1, iat); + }); + } + } + else + { + // fix old input with positions outside 'group' + if (nat_group.size() == 1 && nat_group[0] == 0) + nat_group[0] = nat; + + checkGrouping(nat, nat_group); + ref_.create(nat_group); + + // obtain 'attrib' inside 'group' + size_t start = 0; + size_t ig = 0; + processChildren(cur, [&](const std::string& cname, const xmlNodePtr child) { + if (cname == "group") + { + processChildren(child, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag) + getPtclAttrib(element, 0, nat_group[ig], start); + }); + start += nat_group[ig]; + ig++; + } + else if (cname == attrib_tag) + { + if (nat_group.size() > 1) + throw UniformCommunicateError("An 'attrib' XML element node was found outside 'group'" + " without XML element node named 'ionid'." + " Cannot map particles to more than one species. Check XML input!"); + getPtclAttrib(child, 0, nat, 0); + } + }); + } + + if (ref_.getLattice().SuperCellEnum) + { + if (randomizeR == "yes") + { + makeUniformRandom(ref_.R); + ref_.R.setUnit(PosUnit::Lattice); + ref_.convert2Cart(ref_.R); + makeUniformRandom(ref_.spins); + ref_.spins *= 2 * M_PI; + } + else // put them [0,1) in the cell + ref_.applyBC(ref_.R); + } + + //this sets Mass, Z + ref_.resetGroups(); + ref_.createSK(); + + return true; +} + +template +void XMLParticleParserT::checkGrouping(int nat, const std::vector& nat_group) const +{ + app_debug() << "There are " << nat << " particles in " << nat_group.size() << " species containing:" << std::endl; + for (int ig = 0; ig < nat_group.size(); ig++) + { + const auto& group_species_name = ref_.getSpeciesSet().getSpeciesName(ig); + if (nat_group[ig] == 0) + throw UniformCommunicateError("Element '" + group_species_name + "' was provided but never referenced."); + app_debug() << " " << nat_group[ig] << " '" << group_species_name << "'" << std::endl; + } + + if (std::accumulate(nat_group.begin(), nat_group.end(), 0) != nat) + throw UniformCommunicateError( + "The total number of particles doesn't match the sum of the particle counts of all the species."); +} + +/** process xmlnode to reset the properties of a particle set + * @param cur current node + * @return true, if successful + * + * This resets or adds new attributes to a particle set. + * It cannot modify the size of the particle set. + */ +template +bool XMLParticleParserT::reset(xmlNodePtr cur) +{ + ReportEngine PRE("XMLParticleParser", "reset"); + SpeciesSet& tspecies(ref_.getSpeciesSet()); + cur = cur->xmlChildrenNode; + while (cur != NULL) + { + std::string cname((const char*)cur->name); + if (cname == "group") + { + std::string sname; + OhmmsAttributeSet gAttrib; + gAttrib.add(sname, "name"); + gAttrib.put(cur); + if (sname.size()) + { + int sid = tspecies.addSpecies(sname); + setSpeciesProperty(tspecies, sid, cur); + } + } + cur = cur->next; + } + // //@todo Will add a member function to ParticleSet to handle these + // int massind=tspecies.addAttribute("mass"); + // for(int iat=0; iat +void XMLParticleParserT::getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_size, int out_offset) +{ + std::string oname, otype; + int utype = 0; + int size_in = 0; + OhmmsAttributeSet pAttrib; + pAttrib.add(otype, datatype_tag); //datatype + pAttrib.add(oname, "name"); //name + pAttrib.add(utype, condition_tag); //condition + pAttrib.add(size_in, "size"); //size + pAttrib.put(cur); + if (oname.empty() || otype.empty()) + { + app_error() << " Missing attrib/@name or attrib/@datatype " << std::endl; + app_error() << R"( )" << std::endl; + return; + } + int t_id = ref_AttribList.getAttribType(otype); + + if (oname == ionid_tag) + throw UniformCommunicateError("'ionid' should not be parsed by getPtclAttrib."); + else + { + //very permissive in that a unregistered attribute will be created and stored by ParticleSet + //cloning is not going to work + if (t_id == PA_IndexType) + { + ParticleIndex* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_ScalarType) + { + ParticleScalar* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_PositionType) + { + ParticlePos* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_TensorType) + { + ParticleTensor* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + } +} + +template class XMLParticleParserT; +template class XMLParticleParserT; +template class XMLParticleParserT>; +template class XMLParticleParserT>; + + + XMLSaveParticle::XMLSaveParticle(Particle_t& pin) : ref_(pin) {} XMLSaveParticle::~XMLSaveParticle() {} diff --git a/src/Particle/ParticleIO/XMLParticleIO.h b/src/Particle/ParticleIO/XMLParticleIO.h index c05590ea3c..11a0c41a87 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.h +++ b/src/Particle/ParticleIO/XMLParticleIO.h @@ -18,6 +18,7 @@ #include "OhmmsData/OhmmsElementBase.h" #include "OhmmsData/RecordProperty.h" #include "Particle/ParticleSet.h" +#include "Particle/ParticleSetT.h" namespace qmcplusplus { @@ -139,6 +140,41 @@ class XMLParticleParser : public ParticleTags bool reset(xmlNodePtr cur); }; +template +class XMLParticleParserT : public ParticleTags +{ + using Particle_t = ParticleSetT; + using ParticleIndex = typename Particle_t::ParticleIndex; + using ParticleScalar = typename Particle_t::ParticleScalar; + using ParticlePos = typename Particle_t::ParticlePos; + using ParticleTensor = typename Particle_t::ParticleTensor; + + Particle_t& ref_; + AttribListType ref_AttribList; + + /** read the data of a particle attribute + *@param cur the xmlnode + *@param in_offset the location offset to read from XML element node body. + *@param copy_size the number of particle attributes to be read + *@param out_offset the current local count to which copy_size particle attributes are added. + */ + void getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_size, int out_offset); + + void checkGrouping(int nat, const std::vector& nat_group) const; + +public: + /**constructor + *@param aptcl the particleset to be initialized + */ + XMLParticleParserT(Particle_t& aptcl); + + bool readXML(xmlNodePtr cur); + + /** reset the properties of a particle set + */ + bool reset(xmlNodePtr cur); +}; + class XMLSaveParticle : public ParticleTags, public RecordProperty { using Particle_t = ParticleSet; diff --git a/src/Particle/ParticleSetPoolT.cpp b/src/Particle/ParticleSetPoolT.cpp new file mode 100644 index 0000000000..7100822214 --- /dev/null +++ b/src/Particle/ParticleSetPoolT.cpp @@ -0,0 +1,278 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence +// Livermore National Laboratory Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak +// Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/**@file ParticleSetPool.cpp + * @brief Implements ParticleSetPool operators. + */ +#include "ParticleSetPoolT.h" + +#include "LongRange/LRCoulombSingleton.h" +#include "OhmmsData/AttributeSet.h" +#include "OhmmsData/Libxml2Doc.h" +#include "Particle/InitMolecularSystemT.h" +#include "ParticleBase/RandomSeqGenerator.h" +#include "ParticleIO/LatticeIO.h" +#include "ParticleIO/XMLParticleIO.h" +#include "Utilities/ProgressReportEngine.h" +#include +#include + +namespace qmcplusplus +{ +template +ParticleSetPoolT::ParticleSetPoolT(Communicate* c, const char* aname) : + MPIObjectBase(c), + simulation_cell_(std::make_unique>()) +{ + ClassName = "ParticleSetPool"; + myName = aname; +} + +template +ParticleSetPoolT::ParticleSetPoolT(ParticleSetPoolT&& other) noexcept : + MPIObjectBase(other.myComm), + simulation_cell_(std::move(other.simulation_cell_)), + myPool(std::move(other.myPool)) +{ + ClassName = other.ClassName; + myName = other.myName; +} + +template +ParticleSetPoolT::~ParticleSetPoolT() = default; + +template +ParticleSetT* +ParticleSetPoolT::getParticleSet(const std::string& pname) +{ + if (auto pit = myPool.find(pname); pit == myPool.end()) + return nullptr; + else + return pit->second.get(); +} + +template +MCWalkerConfigurationT* +ParticleSetPoolT::getWalkerSet(const std::string& pname) +{ + auto mc = dynamic_cast*>(getParticleSet(pname)); + if (mc == nullptr) { + throw std::runtime_error( + "ParticleSePool::getWalkerSet missing " + pname); + } + return mc; +} + +template +void +ParticleSetPoolT::addParticleSet(std::unique_ptr>&& p) +{ + const auto pit(myPool.find(p->getName())); + if (pit == myPool.end()) { + auto& pname = p->getName(); + LOGMSG(" Adding " << pname << " ParticleSet to the pool") + if (&p->getSimulationCell() != simulation_cell_.get()) + throw std::runtime_error( + "Bug detected! ParticleSetPool::addParticleSet requires p " + "created with the simulation " + "cell from ParticleSetPool."); + myPool.emplace(pname, std::move(p)); + } + else + throw std::runtime_error( + p->getName() + " exists. Cannot be added again."); +} + +template +bool +ParticleSetPoolT::readSimulationCellXML(xmlNodePtr cur) +{ + ReportEngine PRE("ParticleSetPool", "putLattice"); + + bool lattice_defined = false; + try { + LatticeParserT a(simulation_cell_->lattice_); + lattice_defined = a.put(cur); + } + catch (const UniformCommunicateError& ue) { + myComm->barrier_and_abort(ue.what()); + } + + if (lattice_defined) { + app_log() << " Overwriting global supercell " << std::endl; + simulation_cell_->resetLRBox(); + if (outputManager.isHighActive()) + simulation_cell_->lattice_.print(app_log(), 2); + else + simulation_cell_->lattice_.print(app_summary(), 1); + } + return lattice_defined; +} + +/** process an xml element + * @param cur current xmlNodePtr + * @return true, if successful. + * + * Creating MCWalkerConfiguration for all the ParticleSet + * objects. + */ +template +bool +ParticleSetPoolT::put(xmlNodePtr cur) +{ + ReportEngine PRE("ParticleSetPool", "put"); + std::string id("e"); + std::string role("none"); + std::string randomR("no"); + std::string randomsrc; + std::string useGPU; + std::string spinor; + OhmmsAttributeSet pAttrib; + pAttrib.add(id, "id"); + pAttrib.add(id, "name"); + pAttrib.add(role, "role"); + pAttrib.add(randomR, "random"); + pAttrib.add(randomsrc, "randomsrc"); + pAttrib.add(randomsrc, "random_source"); + pAttrib.add(spinor, "spinor", {"no", "yes"}); + pAttrib.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values); + pAttrib.put(cur); + // backward compatibility + if (id == "e" && role == "none") + role = "MC"; + ParticleSetT* pTemp = getParticleSet(id); + if (pTemp == 0) { + const bool use_offload = CPUOMPTargetSelector::selectPlatform(useGPU) == + PlatformKind::OMPTARGET; + app_summary() << std::endl; + app_summary() << " Particle Set" << std::endl; + app_summary() << " ------------" << std::endl; + app_summary() << " Name: " << id + << " Offload : " << (use_offload ? "yes" : "no") + << std::endl; + app_summary() << std::endl; + + // select OpenMP offload implementation in ParticleSet. + if (use_offload) + pTemp = new MCWalkerConfigurationT( + *simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD); + else + pTemp = new MCWalkerConfigurationT( + *simulation_cell_, DynamicCoordinateKind::DC_POS); + + myPool.emplace(id, pTemp); + + try { + XMLParticleParserT pread(*pTemp); + pread.readXML(cur); + } + catch (const UniformCommunicateError& ue) { + myComm->barrier_and_abort(ue.what()); + } + + // if random_source is given, create a node + if (randomR == "yes" && !randomsrc.empty()) { + xmlNodePtr anode = xmlNewNode(NULL, (const xmlChar*)"init"); + xmlNewProp(anode, (const xmlChar*)"source", + (const xmlChar*)randomsrc.c_str()); + xmlNewProp( + anode, (const xmlChar*)"target", (const xmlChar*)id.c_str()); + randomize_nodes.push_back(anode); + } + pTemp->setName(id); + pTemp->setSpinor(spinor == "yes"); + app_summary() << " Particle set size: " << pTemp->getTotalNum() + << " Groups : " << pTemp->groups() << std::endl; + app_summary() << std::endl; + return true; + } + else { + app_warning() << "Particle set " << id + << " is already created. Ignoring this section." + << std::endl; + } + app_summary() << std::endl; + return true; +} + +template +void +ParticleSetPoolT::randomize() +{ + app_log() << "ParticleSetPool::randomize " << randomize_nodes.size() + << " ParticleSet" << (randomize_nodes.size() == 1 ? "" : "s") + << "." << std::endl; + bool success = true; + for (int i = 0; i < randomize_nodes.size(); ++i) { + InitMolecularSystemT moinit(*this); + success &= moinit.put(randomize_nodes[i]); + xmlFreeNode(randomize_nodes[i]); + } + randomize_nodes.clear(); + if (!success) + throw std::runtime_error( + "ParticleSePool::randomize failed to randomize some Particlesets!"); +} + +template +bool +ParticleSetPoolT::get(std::ostream& os) const +{ + os << "ParticleSetPool has: " << std::endl << std::endl; + os.setf(std::ios::scientific, std::ios::floatfield); + os.precision(14); + for (const auto& [name, pset] : myPool) + if (outputManager.isDebugActive()) + pset->print(os, 0); + else + pset->print(os, 10 /* maxParticlesToPrint */); + return true; +} + +template +void +ParticleSetPoolT::output_particleset_info( + Libxml2Document& doc, xmlNodePtr root) +{ + xmlNodePtr particles_info = doc.addChild(root, "particles"); + typename PoolType::const_iterator it(myPool.begin()), it_end(myPool.end()); + while (it != it_end) { + xmlNodePtr particle = doc.addChild(particles_info, "particle"); + doc.addChild(particle, "name", (*it).second->getName()); + doc.addChild(particle, "size", (*it).second->getTotalNum()); + ++it; + } +} + +/** reset is used to initialize and evaluate the distance tables + */ +template +void +ParticleSetPoolT::reset() +{ + for (const auto& [key, pset] : myPool) + pset->update(); +} + +// explicit instantiations +template class ParticleSetPoolT; +template class ParticleSetPoolT; +template class ParticleSetPoolT>; +template class ParticleSetPoolT>; + +} // namespace qmcplusplus diff --git a/src/Particle/ParticleSetPoolT.h b/src/Particle/ParticleSetPoolT.h new file mode 100644 index 0000000000..da72817dfc --- /dev/null +++ b/src/Particle/ParticleSetPoolT.h @@ -0,0 +1,155 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_PARTICLESETPOOLT_H +#define QMCPLUSPLUS_PARTICLESETPOOLT_H + +#include "Message/MPIObjectBase.h" +#include "OhmmsData/OhmmsElementBase.h" +#include "Particle/MCWalkerConfigurationT.h" +#include "ParticleSetT.h" +#include "SimulationCellT.h" + +namespace qmcplusplus +{ +/** @ingroup qmcapp + * @brief Manage a collection of ParticleSet objects + * + * This object handles \ elements and + * functions as a builder class for ParticleSet objects. + */ +template +class ParticleSetPoolT : public MPIObjectBase +{ +public: + using PoolType = + std::map>>; + + /** constructor + * @param aname xml tag + */ + ParticleSetPoolT(Communicate* c, const char* aname = "particleset"); + ~ParticleSetPoolT(); + + ParticleSetPoolT(const ParticleSetPoolT&) = delete; + ParticleSetPoolT& + operator=(const ParticleSetPoolT&) = delete; + ParticleSetPoolT(ParticleSetPoolT&& pset) noexcept; + ParticleSetPoolT& + operator=(ParticleSetPoolT&&) = default; + + bool + put(xmlNodePtr cur); + bool + get(std::ostream& os) const; + void + reset(); + + void + output_particleset_info(Libxml2Document& doc, xmlNodePtr root); + + /** initialize the supercell shared by all the particle sets + * + * return value is never checked anywhere + * side effect simulation_cell_ UPtr is set + * to particle layout created on heap. + * This is later directly assigned to pset member variable Lattice. + */ + bool + readSimulationCellXML(xmlNodePtr cur); + + /// return true, if the pool is empty + inline bool + empty() const + { + return myPool.empty(); + } + + /** add a ParticleSet* to the pool with its ownership transferred + * ParticleSet built outside the ParticleSetPool must be constructed with + * the simulation cell from this->simulation_cell_. + */ + void + addParticleSet(std::unique_ptr>&& p); + + /** get a named ParticleSet + * @param pname name of the ParticleSet + * @return a MCWalkerConfiguration object with pname + * + * When the named ParticleSet is not in this object, return 0. + */ + ParticleSetT* + getParticleSet(const std::string& pname); + + /** get a named MCWalkerConfiguration + * @param pname name of the MCWalkerConfiguration + * @return a MCWalkerConfiguration object with pname + * + * When the named MCWalkerConfiguration is not in this object, return 0. + */ + MCWalkerConfigurationT* + getWalkerSet(const std::string& pname); + + /** get the Pool object + */ + inline const PoolType& + getPool() const + { + return myPool; + } + + /// get simulation cell + const auto& + getSimulationCell() const + { + return *simulation_cell_; + } + + /// set simulation cell + void + setSimulationCell(const SimulationCellT& simulation_cell) + { + *simulation_cell_ = simulation_cell; + } + + /** randomize a particleset particleset/@random='yes' && + * particleset@random_source exists + */ + void + randomize(); + +private: + /** global simulation cell + * + * updated by + * - readSimulationCellXML() parsing element + * - setSimulationCell() + */ + std::unique_ptr> simulation_cell_; + /** List of ParticleSet owned + * + * Each ParticleSet has to have a unique name which is used as a key for the + * map. + */ + PoolType myPool; + /** xml node for random initialization. + * + * randomize() process initializations just before starting qmc sections + */ + std::vector randomize_nodes; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/ParticleSetT.BC.cpp b/src/Particle/ParticleSetT.BC.cpp new file mode 100644 index 0000000000..50c3f641e6 --- /dev/null +++ b/src/Particle/ParticleSetT.BC.cpp @@ -0,0 +1,194 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/**@file ParticleSet.BC.cpp + * @brief definition of functions controlling Boundary Conditions + */ +#include "Particle/ParticleSetT.h" +#include "Particle/FastParticleOperators.h" +#include "Concurrency/OpenMP.h" +#include "LongRange/StructFactT.h" + +namespace qmcplusplus +{ +/** Creating StructureFactor + * + * Currently testing only 1 component for PBCs. + */ +void ParticleSet::createSK() +{ + if (structure_factor_) + throw std::runtime_error("Report bug! structure_factor_ has already been created. Unexpected call sequence."); + + auto& Lattice = getLattice(); + auto& LRBox = getLRBox(); + if (Lattice.explicitly_defined) + convert2Cart(R); //make sure that R is in Cartesian coordinates + + if (Lattice.SuperCellEnum != SUPERCELL_OPEN) + { + app_log() << "\n Creating Structure Factor for periodic systems " << LRBox.LR_kc << std::endl; + structure_factor_ = std::make_unique(LRBox, simulation_cell_.getKLists()); + } + + //set the mass array + int beforemass = my_species_.numAttributes(); + int massind = my_species_.addAttribute("mass"); + if (beforemass == massind) + { + app_log() << " ParticleSet::createSK setting mass of " << getName() << " to 1.0" << std::endl; + for (int ig = 0; ig < my_species_.getTotalNum(); ++ig) + my_species_(massind, ig) = 1.0; + } + for (int iat = 0; iat < GroupID.size(); iat++) + Mass[iat] = my_species_(massind, GroupID[iat]); + + coordinates_->setAllParticlePos(R); +} + +void ParticleSet::turnOnPerParticleSK() +{ + if (structure_factor_) + structure_factor_->turnOnStorePerParticle(*this); + else + throw std::runtime_error("ParticleSet::turnOnPerParticleSK trying to turn on per particle storage in " + "structure_factor_ but structure_factor_ has not been created."); +} + +bool ParticleSet::getPerParticleSKState() const +{ + bool isPerParticleOn = false; + if (structure_factor_) + isPerParticleOn = structure_factor_->isStorePerParticle(); + return isPerParticleOn; +} + +void ParticleSet::convert(const ParticlePos& pin, ParticlePos& pout) +{ + if (pin.getUnit() == pout.getUnit()) + { + pout = pin; + return; + } + if (pin.getUnit() == PosUnit::Lattice) + //convert to CartesianUnit + { + ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); + } + else + //convert to getLattice()Unit + { + ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); + } +} + +void ParticleSet::convert2Unit(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + if (pin.getUnit() == PosUnit::Lattice) + pout = pin; + else + ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); +} + +void ParticleSet::convert2Cart(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Cartesian); + if (pin.getUnit() == PosUnit::Cartesian) + pout = pin; + else + ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); +} + +void ParticleSet::convert2Unit(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Lattice) + return; + else + { + pinout.setUnit(PosUnit::Lattice); + ConvertPosUnit::apply(pinout, getLattice().G, 0, pinout.size()); + } +} + +void ParticleSet::convert2Cart(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Cartesian) + return; + else + { + pinout.setUnit(PosUnit::Cartesian); + ConvertPosUnit::apply(pinout, getLattice().R, 0, pinout.size()); + } +} + +void ParticleSet::applyBC(const ParticlePos& pin, ParticlePos& pout) { applyBC(pin, pout, 0, pin.size()); } + +void ParticleSet::applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last) +{ + if (pin.getUnit() == PosUnit::Cartesian) + { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Cart2Cart(pin, getLattice().G, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Cart2Unit(pin, getLattice().G, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else if (pin.getUnit() == PosUnit::Lattice) + { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Unit2Cart(pin, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Unit2Unit(pin, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else + throw std::runtime_error("Unknown unit conversion"); +} + +void ParticleSet::applyBC(ParticlePos& pos) +{ + if (pos.getUnit() == PosUnit::Lattice) + { + ApplyBConds::Unit2Unit(pos, 0, TotalNum); + } + else + { + ApplyBConds::Cart2Cart(pos, getLattice().G, getLattice().R, 0, TotalNum); + } +} + +void ParticleSet::applyMinimumImage(ParticlePos& pinout) +{ + if (getLattice().SuperCellEnum == SUPERCELL_OPEN) + return; + for (int i = 0; i < pinout.size(); ++i) + getLattice().applyMinimumImage(pinout[i]); +} + +void ParticleSet::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + convert2Unit(pin, pout); // convert to crystalline unit + put2box(pout); +} + +void ParticleSet::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout) +{ + convert2UnitInBox(pin, pout); // convert to crystalline unit + convert2Cart(pout); +} +} // namespace qmcplusplus diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp index 5b78bed54e..bc5f7518ab 100644 --- a/src/Particle/ParticleSetT.cpp +++ b/src/Particle/ParticleSetT.cpp @@ -23,8 +23,10 @@ #include "ParticleSetT.h" +#include "Concurrency/OpenMP.h" #include "Particle/DistanceTableT.h" #include "Particle/DynamicCoordinatesBuilder.h" +#include "Particle/FastParticleOperators.h" #include "Particle/LongRange/StructFactT.h" #include "Particle/createDistanceTableT.h" #include "ParticleBase/RandomSeqGeneratorGlobal.h" @@ -1124,6 +1126,216 @@ ParticleSetT::extractSKRefList( return sk_list; } +/** Creating StructureFactor + * + * Currently testing only 1 component for PBCs. + */ +template +void +ParticleSetT::createSK() +{ + if (structure_factor_) + throw std::runtime_error("Report bug! structure_factor_ has already " + "been created. Unexpected call sequence."); + + auto& Lattice = getLattice(); + auto& LRBox = getLRBox(); + if (Lattice.explicitly_defined) + convert2Cart(R); // make sure that R is in Cartesian coordinates + + if (Lattice.SuperCellEnum != SUPERCELL_OPEN) { + app_log() << "\n Creating Structure Factor for periodic systems " + << LRBox.LR_kc << std::endl; + structure_factor_ = std::make_unique>( + LRBox, simulation_cell_.getKLists()); + } + + // set the mass array + int beforemass = my_species_.numAttributes(); + int massind = my_species_.addAttribute("mass"); + if (beforemass == massind) { + app_log() << " ParticleSet::createSK setting mass of " << getName() + << " to 1.0" << std::endl; + for (int ig = 0; ig < my_species_.getTotalNum(); ++ig) + my_species_(massind, ig) = 1.0; + } + for (int iat = 0; iat < GroupID.size(); iat++) + Mass[iat] = my_species_(massind, GroupID[iat]); + + coordinates_->setAllParticlePos(R); +} + +template +void +ParticleSetT::turnOnPerParticleSK() +{ + if (structure_factor_) + structure_factor_->turnOnStorePerParticle(*this); + else + throw std::runtime_error( + "ParticleSet::turnOnPerParticleSK trying to turn on per particle " + "storage in " + "structure_factor_ but structure_factor_ has not been created."); +} + +template +bool +ParticleSetT::getPerParticleSKState() const +{ + bool isPerParticleOn = false; + if (structure_factor_) + isPerParticleOn = structure_factor_->isStorePerParticle(); + return isPerParticleOn; +} + +template +void +ParticleSetT::convert(const ParticlePos& pin, ParticlePos& pout) +{ + if (pin.getUnit() == pout.getUnit()) { + pout = pin; + return; + } + if (pin.getUnit() == PosUnit::Lattice) + // convert to CartesianUnit + { + ConvertPosUnit::apply( + pin, getLattice().R, pout, 0, pin.size()); + } + else + // convert to getLattice()Unit + { + ConvertPosUnit::apply( + pin, getLattice().G, pout, 0, pin.size()); + } +} + +template +void +ParticleSetT::convert2Unit(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + if (pin.getUnit() == PosUnit::Lattice) + pout = pin; + else + ConvertPosUnit::apply( + pin, getLattice().G, pout, 0, pin.size()); +} + +template +void +ParticleSetT::convert2Cart(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Cartesian); + if (pin.getUnit() == PosUnit::Cartesian) + pout = pin; + else + ConvertPosUnit::apply( + pin, getLattice().R, pout, 0, pin.size()); +} + +template +void +ParticleSetT::convert2Unit(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Lattice) + return; + else { + pinout.setUnit(PosUnit::Lattice); + ConvertPosUnit::apply( + pinout, getLattice().G, 0, pinout.size()); + } +} + +template +void +ParticleSetT::convert2Cart(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Cartesian) + return; + else { + pinout.setUnit(PosUnit::Cartesian); + ConvertPosUnit::apply( + pinout, getLattice().R, 0, pinout.size()); + } +} + +template +void +ParticleSetT::applyBC(const ParticlePos& pin, ParticlePos& pout) +{ + applyBC(pin, pout, 0, pin.size()); +} + +template +void +ParticleSetT::applyBC( + const ParticlePos& pin, ParticlePos& pout, int first, int last) +{ + if (pin.getUnit() == PosUnit::Cartesian) { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Cart2Cart( + pin, getLattice().G, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Cart2Unit( + pin, getLattice().G, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else if (pin.getUnit() == PosUnit::Lattice) { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Unit2Cart( + pin, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Unit2Unit( + pin, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else + throw std::runtime_error("Unknown unit conversion"); +} + +template +void +ParticleSetT::applyBC(ParticlePos& pos) +{ + if (pos.getUnit() == PosUnit::Lattice) { + ApplyBConds::Unit2Unit(pos, 0, TotalNum); + } + else { + ApplyBConds::Cart2Cart( + pos, getLattice().G, getLattice().R, 0, TotalNum); + } +} + +template +void +ParticleSetT::applyMinimumImage(ParticlePos& pinout) +{ + if (getLattice().SuperCellEnum == SUPERCELL_OPEN) + return; + for (int i = 0; i < pinout.size(); ++i) + getLattice().applyMinimumImage(pinout[i]); +} + +template +void +ParticleSetT::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + convert2Unit(pin, pout); // convert to crystalline unit + put2box(pout); +} + +template +void +ParticleSetT::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout) +{ + convert2UnitInBox(pin, pout); // convert to crystalline unit + convert2Cart(pout); +} + // explicit instantiations template class ParticleSetT; template class ParticleSetT; diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h index 138b352616..906e092adb 100644 --- a/src/Particle/ParticleSetT.h +++ b/src/Particle/ParticleSetT.h @@ -74,6 +74,7 @@ class ParticleSetT : public OhmmsElementBase using Index_t = typename LatticeParticleTraits::Index_t; using Scalar_t = typename LatticeParticleTraits::Scalar_t; + using Tensor_t = typename LatticeParticleTraits::Tensor_t; using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; using SingleParticlePos = typename LatticeParticleTraits::SingleParticlePos; @@ -84,6 +85,7 @@ class ParticleSetT : public OhmmsElementBase typename LatticeParticleTraits::ParticleGradient; using ParticleLaplacian = typename LatticeParticleTraits::ParticleLaplacian; + using ParticleTensor = typename LatticeParticleTraits::ParticleTensor; /// walker type using Walker_t = Walker, LatticeParticleTraits>; @@ -99,6 +101,8 @@ class ParticleSetT : public OhmmsElementBase quantum }; + static constexpr auto DIM = ParticleSetTraits::DIM; + /// quantum_domain of the particles, default = classical quantum_domains quantum_domain; @@ -124,14 +128,14 @@ class ParticleSetT : public OhmmsElementBase Index_t direction; /// Particle density in G-space for MPC interaction - std::vector> DensityReducedGvecs; + std::vector> DensityReducedGvecs; std::vector Density_G; - Array Density_r; + Array Density_r; /// DFT potential - std::vector> VHXCReducedGvecs; + std::vector> VHXCReducedGvecs; std::vector VHXC_G[2]; - Array VHXC_r[2]; + Array VHXC_r[2]; /** name-value map of Walker Properties * diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h index 3ea028b54f..299687aeec 100644 --- a/src/Particle/ParticleSetTraits.h +++ b/src/Particle/ParticleSetTraits.h @@ -71,6 +71,7 @@ struct LatticeParticleTraits using Index_t = int; using Scalar_t = FullPrecRealType; using Complex_t = FullPrecComplexType; + using Tensor_t = ParticleTensorType; using ParticleIndex = ParticleAttrib; using ParticleScalar = ParticleAttrib; diff --git a/src/Particle/ReptileT.h b/src/Particle/ReptileT.h new file mode 100644 index 0000000000..ada42b2712 --- /dev/null +++ b/src/Particle/ReptileT.h @@ -0,0 +1,350 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence +// Livermore National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_REPTILET_H +#define QMCPLUSPLUS_REPTILET_H + +#include "Configuration.h" +#include "ParticleSetTraits.h" +#include "QMCDrivers/DriftOperators.h" +#include "QMCDrivers/WalkerProperties.h" +#include "Walker.h" + +namespace qmcplusplus +{ +template +class MCWalkerConfigurationT; + +template +class ReptileT +{ +public: + using WP = WalkerProperties::Indexes; + using Walker_t = typename MCWalkerConfigurationT::Walker_t; + // using Buffer_t = Walker_t::Buffer_t ; + // using Walker_t = MCWalkerConfiguration::Walker_t; + using WalkerIter_t = typename MCWalkerConfigurationT::iterator; + using ReptileConfig_t = std::vector; + using IndexType = typename ParticleSetTraits::IndexType; + using RealType = typename ParticleSetTraits::RealType; + + std::vector Action; + std::vector TransProb; + + RealType forwardprob; + RealType backwardprob; + RealType forwardaction; + RealType backwardaction; + + RealType tau; + + MCWalkerConfigurationT& w; + WalkerIter_t repstart, repend; + IndexType direction, headindex, nbeads; + Walker_t* prophead; + + inline ReptileT( + MCWalkerConfigurationT& W, WalkerIter_t start, WalkerIter_t end) : + w(W), + repstart(start), + repend(end), + direction(1), + headindex(0), + prophead(0) //, r2prop(0.0), r2accept(0.0),tau(0.0) + { + Action.resize(3); + Action[0] = w.addProperty("ActionBackward"); + Action[1] = w.addProperty("ActionForward"); + Action[2] = w.addProperty("ActionLocal"); + TransProb.resize(2); + TransProb[0] = w.addProperty("TransProbBackward"); + TransProb[1] = w.addProperty("TransProbForward"); + + nbeads = repend - repstart; + } + + ~ReptileT() + { + } + + inline IndexType + size() + { + return nbeads; + } + + inline Walker_t& + operator[](IndexType i) + { + return getWalker(getBeadIndex(i)); + } + + inline IndexType + wrapIndex(IndexType repindex) + { + return (repindex % nbeads + nbeads) % nbeads; + } + + inline Walker_t& + getWalker(IndexType i) + { + WalkerIter_t bead = repstart + wrapIndex(i); + return **bead; + } + + inline IndexType + getBeadIndex(IndexType i) + { + return wrapIndex(headindex + direction * i); + } + inline Walker_t& + getBead(IndexType i) + { + return getWalker(getBeadIndex(i)); + } + inline Walker_t& + getHead() + { + return getWalker(getBeadIndex(0)); + } + inline Walker_t& + getTail() + { + return getWalker(getBeadIndex(nbeads - 1)); + } + inline Walker_t& + getNext() + { + return getWalker(getBeadIndex(nbeads - 2)); + } + inline Walker_t& + getCenter() + { + return getWalker(getBeadIndex((nbeads - 1) / 2)); + } + // inline void setProposedHead(){ + + inline void + flip() + { + // direction*=-1; + // headindex = getBeadIndex(nbeads-1); + headindex = wrapIndex(headindex - direction); + direction *= -1; + } + + inline void + setDirection(IndexType dir) + { + direction = dir; + } + + inline void + setBead(Walker_t& walker, IndexType i) + { + IndexType index = getBeadIndex(i); + Walker_t& newbead(getWalker(index)); + newbead = walker; // This should be a hard copy + } + + inline void + setHead(Walker_t& overwrite) + { + // overwrite last element. + headindex = getBeadIndex(nbeads - 1); // sets to position of tail. + Walker_t& newhead(getBead(0)); + newhead = overwrite; + } + // This function does two things: 1.) Moves the reptile forward 1 + // step. 2.) Returns the new head. + inline Walker_t& + getNewHead() + { + // overwrite last element. + headindex = getBeadIndex(nbeads - 1); // sets to position of tail. + return getWalker(headindex); + } + + void + saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType actionindex = 2; + if (direction != 0) + actionindex = (1 - d * direction) / 2; + walker.Properties(nPsi, Action[actionindex]) = val; + } + + RealType + getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType actionindex = 2; + if (d != 0) + actionindex = (1 - direction * d) / 2; + + return walker.Properties(nPsi, Action[actionindex]); + } + + RealType + getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d, + IndexType nPsi = 0) + { + RealType af = getDirectionalAction(old_walker, +1, nPsi); + RealType ab = getDirectionalAction(new_walker, -1, nPsi); + RealType a0 = getDirectionalAction(old_walker, 0, nPsi) + + getDirectionalAction(new_walker, 0, nPsi); + return af + ab + a0; + } + + void + saveTransProb( + Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + walker.Properties(nPsi, TransProb[transindex]) = val; + } + + void + saveTransProb(ParticleSetT& W, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + W.Properties(nPsi, TransProb[transindex]) = val; + } + RealType + getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + return walker.Properties(nPsi, TransProb[transindex]); + } + RealType + getTransProb(ParticleSetT& W, IndexType d, RealType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + return W.Properties(nPsi, TransProb[transindex]); + } + + inline void + printState() + { + app_log() << "********PRINT REPTILE STATE*********\n"; + app_log() << "Direction=" << direction << " Headindex=" << headindex + << " tail=" << getBeadIndex(nbeads - 1) + << "\n next=" << getBeadIndex(nbeads - 2) + << " nbeads=" << nbeads << std::endl; + app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]" + "\tAction[2]\t\n"; + for (int i = 0; i < nbeads; i++) { + app_log() << i << "\t" << getBeadIndex(i) << "\t" + << getBead(i).Properties(WP::LOCALENERGY) << "\t" + << getBead(i).Properties(Action[0]) << "\t" + << getBead(i).Properties(Action[1]) << "\t" + << getBead(i).Properties(Action[2]) << "\n"; + } + app_log() << "POSITIONS===============:\n"; + for (int i = 0; i < nbeads; i++) { + // app_log()<length of reptile, then return the last + // bead. if t<0; return the first bead. + inline typename Walker_t::ParticlePos + linearInterp(RealType t) + { + IndexType nbead = + IndexType(t / tau); // Calculate the lower bound on the timeslice. + // t is between binnum*Tau and (binnum+1)Tau + RealType beadfrac = + t / tau - nbead; // the fractional coordinate between n and n+1 bead + if (nbead <= 0) { + typename ParticleSetT::ParticlePos result = getHead().R; + return result; + } + else if (nbead >= nbeads - 1) { + typename ParticleSetT::ParticlePos result = getTail().R; + return result; + } + + else { + typename Walker_t::ParticlePos dR(getBead(nbead + 1).R), + interpR(getBead(nbead).R); + dR = dR - getBead(nbead).R; + + interpR = getBead(nbead).R + beadfrac * dR; + return interpR; + } + } + inline ReptileConfig_t + getReptileSlicePositions(RealType tau, RealType beta) + { + IndexType nbeads_new = IndexType(beta / tau); + ReptileConfig_t new_reptile_coords(0); + + for (IndexType i = 0; i < nbeads_new; i++) + new_reptile_coords.push_back(linearInterp(tau * i)); + + return new_reptile_coords; + } + + inline void + setReptileSlicePositions(ReptileConfig_t& rept) + { + if (rept.size() == nbeads) { + for (int i = 0; i < nbeads; i++) + getBead(i).R = rept[i]; + } + else + ; + } + + inline void + setReptileSlicePositions(typename Walker_t::ParticlePos R) + { + for (int i = 0; i < nbeads; i++) + getBead(i).R = R; + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SampleStackT.cpp b/src/Particle/SampleStackT.cpp new file mode 100644 index 0000000000..a40acd9bb9 --- /dev/null +++ b/src/Particle/SampleStackT.cpp @@ -0,0 +1,81 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "SampleStackT.h" + +#include "Utilities/IteratorUtility.h" + +namespace qmcplusplus +{ + +/** allocate the SampleStack + * @param n number of samples per rank + * @param num_ranks number of ranks. Used to set global number of samples. + */ +template +void +SampleStackT::setMaxSamples(size_t n, size_t num_ranks) +{ + max_samples_ = n; + global_num_samples_ = n * num_ranks; + current_sample_count_ = std::min(current_sample_count_, max_samples_); + sample_vector_.resize(n, MCSample(0)); +} + +template +const MCSample& +SampleStackT::getSample(size_t i) const +{ + return sample_vector_[i]; +} + +template +void +SampleStackT::appendSample(MCSample&& sample) +{ + // Ignore samples in excess of the expected number of samples + if (current_sample_count_ < max_samples_) { + sample_vector_[current_sample_count_] = std::move(sample); + current_sample_count_++; + } +} + +/** load a single sample from SampleStack + */ +template +void +SampleStackT::loadSample(ParticleSetT& pset, size_t iw) const +{ + pset.R = sample_vector_[iw].R; + pset.spins = sample_vector_[iw].spins; +} + +template +void +SampleStackT::clearEnsemble() +{ + sample_vector_.clear(); + current_sample_count_ = 0; +} + +template +void +SampleStackT::resetSampleCount() +{ + current_sample_count_ = 0; +} + +template class SampleStackT; +template class SampleStackT; +template class SampleStackT>; +template class SampleStackT>; + +} // namespace qmcplusplus diff --git a/src/Particle/SampleStackT.h b/src/Particle/SampleStackT.h new file mode 100644 index 0000000000..228a26e874 --- /dev/null +++ b/src/Particle/SampleStackT.h @@ -0,0 +1,84 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SAMPLE_STACKT_H +#define QMCPLUSPLUS_SAMPLE_STACKT_H + +#include "Particle/MCSample.h" +#include "Particle/ParticleSetT.h" +#include "Particle/Walker.h" +#include "Particle/WalkerConfigurations.h" + +#include + +namespace qmcplusplus +{ +template +class SampleStackT +{ +public: + using PropertySetType = typename ParticleSetTraits::PropertySetType; + + size_t + getMaxSamples() const + { + return max_samples_; + } + + bool + empty() const + { + return sample_vector_.empty(); + } + + const MCSample& + getSample(size_t i) const; + + //@{save/load/clear function for optimization + inline size_t + getNumSamples() const + { + return current_sample_count_; + } + /// set the number of max samples per rank. + void + setMaxSamples(size_t n, size_t number_of_ranks = 1); + /// Global number of samples is number of samples per rank * number of ranks + size_t + getGlobalNumSamples() const + { + return global_num_samples_; + } + /// load a single sample from SampleStack + void + loadSample(ParticleSetT& pset, size_t iw) const; + + void + appendSample(MCSample&& sample); + + /// clear the ensemble + void + clearEnsemble(); + //@} + /// Set the sample count to zero but preserve the storage + void + resetSampleCount(); + +private: + size_t max_samples_{10}; + size_t current_sample_count_{0}; + size_t global_num_samples_{max_samples_}; + + std::vector sample_vector_; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SimulationCellT.h b/src/Particle/SimulationCellT.h index ff8240325a..7eb03fc3a4 100644 --- a/src/Particle/SimulationCellT.h +++ b/src/Particle/SimulationCellT.h @@ -17,7 +17,8 @@ namespace qmcplusplus { -class ParticleSetPool; +template +class ParticleSetPoolT; template class SimulationCellT @@ -65,7 +66,7 @@ class SimulationCellT /// K-Vector List. KContainerT k_lists_; - friend class ParticleSetPool; + friend class ParticleSetPoolT; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h index 2a69c08aaf..44b574fcd4 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h @@ -23,6 +23,7 @@ #include "mpi/collectives.h" #include "mpi/point2point.h" #include +#include "QMCWaveFunctions/EinsplineSetBuilder.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp new file mode 100644 index 0000000000..bf6c0c7fff --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp @@ -0,0 +1,259 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National +// Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge +// National Laboratory Ye Luo, yeluo@anl.gov, Argonne +// National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file BsplineReaderBase.cpp + * + * Implement super function + */ +#include "BsplineReaderBaseT.h" + +#include "Message/CommOperators.h" +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" + +#include +#include + +namespace qmcplusplus +{ +template +BsplineReaderBaseT::BsplineReaderBaseT(EinsplineSetBuilderT* e) : + mybuilder(e), + MeshSize(0), + checkNorm(true), + saveSplineCoefs(false), + rotate(true) +{ + myComm = mybuilder->getCommunicator(); +} + +template +void +BsplineReaderBaseT::get_psi_g( + int ti, int spin, int ib, Vector>& cG) +{ + int ncg = 0; + if (myComm->rank() == 0) { + std::string path = psi_g_path(ti, spin, ib); + mybuilder->H5File.read(cG, path); + ncg = cG.size(); + } + myComm->bcast(ncg); + if (ncg != mybuilder->MaxNumGvecs) { + APP_ABORT("Failed : ncg != MaxNumGvecs"); + } + myComm->bcast(cG); +} + +template +BsplineReaderBaseT::~BsplineReaderBaseT() +{ +} + +inline std::string +make_bandinfo_filename(const std::string& root, int spin, int twist, + const Tensor& tilematrix, int gid) +{ + std::ostringstream oo; + oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) + << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1) + << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) + << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist; + if (gid >= 0) + oo << ".g" << gid; + return oo.str(); +} + +inline std::string +make_bandgroup_name(const std::string& root, int spin, int twist, + const Tensor& tilematrix, int first, int last) +{ + std::ostringstream oo; + oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) + << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1) + << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) + << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist << ".l" + << first << "u" << last; + return oo.str(); +} + +template +void +BsplineReaderBaseT::setCommon(xmlNodePtr cur) +{ + // check orbital normalization by default + std::string checkOrbNorm("yes"); + std::string saveCoefs("no"); + OhmmsAttributeSet a; + a.add(checkOrbNorm, "check_orb_norm"); + a.add(saveCoefs, "save_coefs"); + a.put(cur); + + // allow user to turn off norm check with a warning + if (checkOrbNorm == "no") { + app_log() << "WARNING: disable orbital normalization check!" + << std::endl; + checkNorm = false; + } + saveSplineCoefs = saveCoefs == "yes"; +} + +template +std::unique_ptr> +BsplineReaderBaseT::create_spline_set(int spin, xmlNodePtr cur) +{ + int ns(0); + std::string spo_object_name; + OhmmsAttributeSet a; + a.add(ns, "size"); + a.add(spo_object_name, "name"); + a.add(spo_object_name, "id"); + a.put(cur); + + if (ns == 0) + APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing"); + + if (spo2band.empty()) + spo2band.resize(mybuilder->states.size()); + + std::vector& fullband = (*(mybuilder->FullBands[spin])); + + if (spo2band[spin].empty()) { + spo2band[spin].reserve(fullband.size()); + if (!mybuilder->states[spin]) + mybuilder->states[spin] = std::make_unique(); + mybuilder->clear_states(spin); + initialize_spo2band( + spin, fullband, *mybuilder->states[spin], spo2band[spin]); + } + + BandInfoGroup vals; + vals.TwistIndex = fullband[0].TwistIndex; + vals.GroupID = 0; + vals.myName = make_bandgroup_name(mybuilder->getName(), spin, + mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns); + vals.selectBands(fullband, 0, ns, false); + + return create_spline_set(spo_object_name, spin, vals); +} + +template +std::unique_ptr> +BsplineReaderBaseT::create_spline_set( + int spin, xmlNodePtr cur, SPOSetInputInfo& input_info) +{ + std::string spo_object_name; + OhmmsAttributeSet a; + a.add(spo_object_name, "name"); + a.add(spo_object_name, "id"); + a.put(cur); + + if (spo2band.empty()) + spo2band.resize(mybuilder->states.size()); + + std::vector& fullband = (*(mybuilder->FullBands[spin])); + + if (spo2band[spin].empty()) { + spo2band[spin].reserve(fullband.size()); + if (!mybuilder->states[spin]) + mybuilder->states[spin] = std::make_unique(); + mybuilder->clear_states(spin); + initialize_spo2band( + spin, fullband, *mybuilder->states[spin], spo2band[spin]); + } + + BandInfoGroup vals; + vals.TwistIndex = fullband[0].TwistIndex; + vals.GroupID = 0; + vals.myName = make_bandgroup_name(mybuilder->getName(), spin, + mybuilder->twist_num_, mybuilder->TileMatrix, input_info.min_index(), + input_info.max_index()); + vals.selectBands(fullband, spo2band[spin][input_info.min_index()], + input_info.max_index() - input_info.min_index(), false); + + return create_spline_set(spo_object_name, spin, vals); +} + +/** build index tables to map a state to band with k-point folidng + * @param bigspace full BandInfo constructed by EinsplineSetBuilder + * @param sposet SPOSetInfo owned by someone, most likely EinsplinseSetBuilder + * @param spo2band spo2band[i] is the index in bigspace + * + * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i + */ +template +void +BsplineReaderBaseT::initialize_spo2band(int spin, + const std::vector& bigspace, SPOSetInfo& sposet, + std::vector& spo2band) +{ + spo2band.reserve(bigspace.size()); + int ns = 0; + for (int i = 0; i < bigspace.size(); ++i) { + spo2band.push_back(i); + SPOInfo a(ns, bigspace[i].Energy); + sposet.add(a); + ns++; + if (bigspace[i].MakeTwoCopies) { + spo2band.push_back(i); + SPOInfo b(ns, bigspace[i].Energy); + sposet.add(b); + ns++; + } + } + + // write to a file + const Communicate* comm = myComm; + if (comm->rank()) + return; + + std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(), + spin, mybuilder->twist_num_, mybuilder->TileMatrix, comm->getGroupID()); + aname += ".bandinfo.dat"; + + std::ofstream o(aname.c_str()); + std::array s; + ns = 0; + using PosType = QMCTraits::PosType; + o << "# Band State TwistIndex BandIndex Energy Kx Ky " + "Kz K1 K2 K3 KmK " + << std::endl; + for (int i = 0; i < bigspace.size(); ++i) { + int ti = bigspace[i].TwistIndex; + int bi = bigspace[i].BandIndex; + double e = bigspace[i].Energy; + int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; + PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); + int s_size = std::snprintf(s.data(), s.size(), + "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n", + i, ns, ti, bi, e, k[0], k[1], k[2], + mybuilder->primcell_kpoints[ti][0], + mybuilder->primcell_kpoints[ti][1], + mybuilder->primcell_kpoints[ti][2], nd); + if (s_size < 0) + throw std::runtime_error("Error generating bandinfo"); + o << s.data(); + ns += nd; + } +} + +template class BsplineReaderBaseT; +template class BsplineReaderBaseT; +template class BsplineReaderBaseT>; +template class BsplineReaderBaseT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h new file mode 100644 index 0000000000..5eab41dea5 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h @@ -0,0 +1,228 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence +// Livermore National Laboratory Ye Luo, yeluo@anl.gov, +// Argonne National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_BSPLINE_READER_BASET_H +#define QMCPLUSPLUS_BSPLINE_READER_BASET_H + +#include "Containers/OhmmsPETE/TinyVector.h" +#include "QMCWaveFunctions/BandInfo.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "mpi/collectives.h" +#include "mpi/point2point.h" +#include + +namespace qmcplusplus +{ +struct SPOSetInputInfo; +template +class EinsplineSetBuilderT; + +/** + * Each SplineC2X needs a reader derived from BsplineReaderBase. + * This base class handles common chores + * - check_twists : read gvectors, set twists for folded bands if needed, and + * set the phase for the special K + * - set_grid : create the basic grid and boundary conditions for einspline + * Note that template is abused but it works. + */ +template +class BsplineReaderBaseT +{ +public: + /// pointer to the EinsplineSetBuilder + EinsplineSetBuilderT* mybuilder; + /// communicator + Communicate* myComm; + /// mesh size + TinyVector MeshSize; + /// check the norm of orbitals + bool checkNorm; + /// save spline coefficients to storage + bool saveSplineCoefs; + /// apply orbital rotations + bool rotate; + /// map from spo index to band index + std::vector> spo2band; + + BsplineReaderBaseT(EinsplineSetBuilderT* e); + + virtual ~BsplineReaderBaseT(); + + /** read gvectors and set the mesh, and prepare for einspline + */ + template + inline bool + set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) + { + // This sets MeshSize from the input file + bool havePsig = mybuilder->ReadGvectors_ESHDF(); + + // If this MeshSize is not initialized, use the meshsize set by the + // input based on FFT grid and meshfactor + if (MeshSize[0] == 0) + MeshSize = mybuilder->MeshSize; + + app_log() << " Using meshsize=" << MeshSize + << "\n vs input meshsize=" << mybuilder->MeshSize + << std::endl; + + for (int j = 0; j < 3; ++j) { + xyz_grid[j].start = 0.0; + xyz_grid[j].end = 1.0; + xyz_grid[j].num = MeshSize[j]; + + if (halfg[j]) { + xyz_bc[j].lCode = ANTIPERIODIC; + xyz_bc[j].rCode = ANTIPERIODIC; + } + else { + xyz_bc[j].lCode = PERIODIC; + xyz_bc[j].rCode = PERIODIC; + } + + xyz_bc[j].lVal = 0.0; + xyz_bc[j].rVal = 0.0; + } + return havePsig; + } + + /** initialize twist-related data for N orbitals + */ + template + inline void + check_twists(SPE* bspline, const BandInfoGroup& bandgroup) + { + // init(orbitalSet,bspline); + bspline->PrimLattice = mybuilder->PrimCell; + bspline->GGt = + dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); + + int N = bandgroup.getNumDistinctOrbitals(); + int numOrbs = bandgroup.getNumSPOs(); + + bspline->setOrbitalSetSize(numOrbs); + bspline->resizeStorage(N, N); + + bspline->first_spo = bandgroup.getFirstSPO(); + bspline->last_spo = bandgroup.getLastSPO(); + + int num = 0; + const std::vector& cur_bands = bandgroup.myBands; + for (int iorb = 0; iorb < N; iorb++) { + int ti = cur_bands[iorb].TwistIndex; + bspline->kPoints[iorb] = + mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); + bspline->MakeTwoCopies[iorb] = + (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; + num += bspline->MakeTwoCopies[iorb] ? 2 : 1; + } + + app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs + << std::endl; + + bspline->HalfG = 0; + TinyVector bconds = + mybuilder->TargetPtcl.getLattice().BoxBConds; + if (!bspline->isComplex()) { + // no k-point folding, single special k point (G, L ...) + TinyVector twist0 = + mybuilder->primcell_kpoints[bandgroup.TwistIndex]; + for (int i = 0; i < 3; i++) + if (bconds[i] && + ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) + bspline->HalfG[i] = 1; + else + bspline->HalfG[i] = 0; + app_log() << " TwistIndex = " << cur_bands[0].TwistIndex + << " TwistAngle " << twist0 << std::endl; + app_log() << " HalfG = " << bspline->HalfG << std::endl; + } + app_log().flush(); + } + + /** return the path name in hdf5 + */ + inline std::string + psi_g_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" + << ib << "/psi_g"; + return path.str(); + } + + /** return the path name in hdf5 + */ + inline std::string + psi_r_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" + << ib << "/psi_r"; + return path.str(); + } + + /** read/bcast psi_g + * @param ti twist index + * @param spin spin index + * @param ib band index + * @param cG psi_g as stored in hdf5 + */ + void + get_psi_g(int ti, int spin, int ib, Vector>& cG); + + /** create the actual spline sets + */ + virtual std::unique_ptr> + create_spline_set(const std::string& my_name, int spin, + const BandInfoGroup& bandgroup) = 0; + + /** setting common parameters + */ + void + setCommon(xmlNodePtr cur); + + /** create the spline after one of the kind is created */ + std::unique_ptr> + create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); + + /** create the spline set */ + std::unique_ptr> + create_spline_set(int spin, xmlNodePtr cur); + + /** Set the checkNorm variable */ + inline void + setCheckNorm(bool new_checknorm) + { + checkNorm = new_checknorm; + }; + + /** Set the orbital rotation flag. Rotations are applied to balance the + * real/imaginary components. */ + inline void + setRotate(bool new_rotate) + { + rotate = new_rotate; + }; + + void + initialize_spo2band(int spin, const std::vector& bigspace, + SPOSetInfo& sposet, std::vector& band2spo); +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h index 720d0bd5e9..9286624c92 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h @@ -225,8 +225,11 @@ class BsplineSetT : public SPOSetT } template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; + template + friend class HybridRepSetReaderT; protected: static const int D = QMCTraits::DIM; @@ -253,8 +256,5 @@ class BsplineSetT : public SPOSetT std::vector offset; }; - - - } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp new file mode 100644 index 0000000000..da978b3647 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp @@ -0,0 +1,23 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "HybridRepCenterOrbitalsT.h" + +namespace qmcplusplus +{ +template class AtomicOrbitalsT; +template class AtomicOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h new file mode 100644 index 0000000000..85bf667736 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h @@ -0,0 +1,819 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALST_H +#define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALST_H + +#include "Numerics/SmoothFunctions.hpp" +#include "Numerics/SoaSphericalTensor.h" +#include "Particle/DistanceTableT.h" +#include "Particle/VirtualParticleSetT.h" +#include "hdf/hdf_archive.h" +#include "spline2/MultiBspline1D.hpp" + +namespace qmcplusplus +{ +template +class HybridRepSetReaderT; + +template +class AtomicOrbitalsT +{ +public: + static const int D = 3; + using AtomicSplineType = typename bspline_traits::SplineType; + using AtomicBCType = typename bspline_traits::BCType; + using AtomicSingleSplineType = UBspline_1d_d; + using PointType = TinyVector; + using value_type = T; + + using vContainer_type = aligned_vector; + +private: + // near core cutoff + T rmin; + // far from core cutoff, rmin_sqrt>=rmin + T rmin_sqrt; + T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; + int spline_npoints, BaseN; + int NumBands, Npad; + PointType center_pos; + const int lmax, lm_tot; + SoaSphericalTensor Ylm; + vContainer_type l_vals; + vContainer_type r_power_minus_l; + /// 1D spline of radial functions of all the orbitals + std::shared_ptr> SplineInst; + + vContainer_type localV, localG, localL; + +public: + AtomicOrbitalsT(int Lmax) : + lmax(Lmax), + lm_tot((Lmax + 1) * (Lmax + 1)), + Ylm(Lmax) + { + r_power_minus_l.resize(lm_tot); + l_vals.resize(lm_tot); + for (int l = 0; l <= lmax; l++) + for (int m = -l; m <= l; m++) + l_vals[l * (l + 1) + m] = l; + rmin = std::exp( + std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); + rmin = std::max(rmin, std::numeric_limits::epsilon()); + rmin_sqrt = + std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); + } + + // accessing functions, const only + T + getCutoff() const + { + return cutoff; + } + T + getCutoffBuffer() const + { + return cutoff_buffer; + } + T + getSplineRadius() const + { + return spline_radius; + } + T + getNonOverlappingRadius() const + { + return non_overlapping_radius; + } + int + getSplineNpoints() const + { + return spline_npoints; + } + int + getLmax() const + { + return lmax; + } + const PointType& + getCenterPos() const + { + return center_pos; + } + + inline void + resizeStorage(size_t Nb) + { + NumBands = Nb; + Npad = getAlignedSize(Nb); + localV.resize(Npad * lm_tot); + localG.resize(Npad * lm_tot); + localL.resize(Npad * lm_tot); + create_spline(); + } + + void + bcast_tables(Communicate* comm) + { + chunked_bcast(comm, SplineInst->getSplinePtr()); + } + + void + gather_tables(Communicate* comm, std::vector& offset) + { + gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); + } + + template + inline void + set_info(const PT& R, const VT& cutoff_in, const VT& cutoff_buffer_in, + const VT& spline_radius_in, const VT& non_overlapping_radius_in, + const int spline_npoints_in) + { + center_pos[0] = R[0]; + center_pos[1] = R[1]; + center_pos[2] = R[2]; + cutoff = cutoff_in; + cutoff_buffer = cutoff_buffer_in; + spline_radius = spline_radius_in; + spline_npoints = spline_npoints_in; + non_overlapping_radius = non_overlapping_radius_in; + BaseN = spline_npoints + 2; + } + + inline void + create_spline() + { + AtomicBCType bc; + bc.lCode = FLAT; + bc.rCode = NATURAL; + Ugrid grid; + grid.start = 0.0; + grid.end = spline_radius; + grid.num = spline_npoints; + SplineInst = std::make_shared>(); + SplineInst->create(grid, bc, lm_tot * Npad); + } + + inline size_t + getSplineSizeInBytes() const + { + return SplineInst->sizeInByte(); + } + + inline void + flush_zero() + { + SplineInst->flush_zero(); + } + + inline void + set_spline(AtomicSingleSplineType* spline, int lm, int ispline) + { + SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); + } + + bool + read_splines(hdf_archive& h5f) + { + einspline_engine bigtable(SplineInst->getSplinePtr()); + int lmax_in = 0, spline_npoints_in = 0; + T spline_radius_in; + if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) + return false; + if (!h5f.readEntry(spline_radius_in, "spline_radius") || + spline_radius_in != spline_radius) + return false; + if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || + spline_npoints_in != spline_npoints) + return false; + return h5f.readEntry(bigtable, "radial_spline"); + } + + bool + write_splines(hdf_archive& h5f) + { + bool success = true; + success = success && h5f.writeEntry(spline_radius, "spline_radius"); + success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); + success = success && h5f.writeEntry(lmax, "l_max"); + success = success && h5f.writeEntry(center_pos, "position"); + einspline_engine bigtable(SplineInst->getSplinePtr()); + success = success && h5f.writeEntry(bigtable, "radial_spline"); + return success; + } + + // evaluate only V + template + inline void + evaluate_v(const T& r, const PointType& dr, VV& myV) + { + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); + else + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + constexpr T czero(0); + T* restrict val = myV.data(); + T* restrict local_val = localV.data(); + std::fill(myV.begin(), myV.end(), czero); + + SplineInst->evaluate(r, localV); + + for (size_t lm = 0; lm < lm_tot; lm++) { +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + + template + inline void + evaluateValues(const DISPL& Displacements, const int center_idx, const T& r, + VM& multi_myV) + { + if (r <= std::numeric_limits::epsilon()) + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + const size_t m = multi_myV.cols(); + constexpr T czero(0); + std::fill(multi_myV.begin(), multi_myV.end(), czero); + SplineInst->evaluate(r, localV); + + for (int ivp = 0; ivp < Displacements.size(); ivp++) { + PointType dr = Displacements[ivp][center_idx]; + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); + + T* restrict val = multi_myV[ivp]; + T* restrict local_val = localV.data(); + for (size_t lm = 0; lm < lm_tot; lm++) { +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < m; ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + } + + // evaluate VGL + template + inline void + evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL) + { + T drx, dry, drz, rhatx, rhaty, rhatz, rinv; + if (r > rmin) { + rinv = 1.0 / r; + } + else { + rinv = 0; + } + drx = dr[0]; + dry = dr[1]; + drz = dr[2]; + rhatx = drx * rinv; + rhaty = dry * rinv; + rhatz = drz * rinv; + + Ylm.evaluateVGL(drx, dry, drz); + const T* restrict Ylm_v = Ylm[0]; + const T* restrict Ylm_gx = Ylm[1]; + const T* restrict Ylm_gy = Ylm[2]; + const T* restrict Ylm_gz = Ylm[3]; + + T* restrict g0 = myG.data(0); + T* restrict g1 = myG.data(1); + T* restrict g2 = myG.data(2); + constexpr T czero(0), cone(1), chalf(0.5); + std::fill(myV.begin(), myV.end(), czero); + std::fill(g0, g0 + Npad, czero); + std::fill(g1, g1 + Npad, czero); + std::fill(g2, g2 + Npad, czero); + std::fill(myL.begin(), myL.end(), czero); + T* restrict val = myV.data(); + T* restrict lapl = myL.data(); + T* restrict local_val = localV.data(); + T* restrict local_grad = localG.data(); + T* restrict local_lapl = localL.data(); + + SplineInst->evaluate_vgl(r, localV, localG, localL); + + if (r > rmin_sqrt) { + // far from core + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + + rhatz * Ylm_gz[lm]) * + r_power; +#pragma omp simd aligned( \ + val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \ + : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = l_val * rinv * local_v; + val[ib] += Ylm_rescale * local_v; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -Vpart * Ylm_rescale; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + + factor3 * rhatz; + + // laplacian + lapl[ib] += + (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * + Ylm_rescale + + (local_g - Vpart) * rhat_dot_G; + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } + } + else if (r > rmin) { + // the possibility of reaching here is very very low + std::cout + << "Warning: an electron is very close to an ion, distance=" + << r << " be careful!" << std::endl; + // near core, kill divergence in the laplacian + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + + Ylm_gz[lm] * rhatz) * + r_power * r; +#pragma omp simd aligned( \ + val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \ + : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = Ylm_rescale * local_v; + val[ib] += Vpart; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -l_val * Vpart * rinv; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + + factor3 * rhatz; + + // laplacian + lapl[ib] += local_l * (cone - chalf * l_val) * + (3 * Ylm_rescale + rhat_dot_G); + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } + } + else { + std::cout << "Warning: an electron is on top of an ion!" + << std::endl; + // strictly zero + +#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) { + // value + val[ib] = Ylm_v[0] * local_val[ib]; + + // laplacian + lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + if (lm_tot > 0) { + // std::cout << std::endl; + for (size_t lm = 1; lm < 4; lm++) { +#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) { + const T local_g = local_grad[ib]; + // grad + g0[ib] += local_g * Ylm_gx[lm]; + g1[ib] += local_g * Ylm_gy[lm]; + g2[ib] += local_g * Ylm_gz[lm]; + } + local_grad += Npad; + } + } + } + } + + template + void + evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH) + { + // Needed to do tensor product here + APP_ABORT("AtomicOrbitals::evaluate_vgh"); + } +}; + +template +class HybridRepCenterOrbitalsT +{ +public: + static const int D = 3; + using PointType = typename AtomicOrbitalsT::PointType; + using RealType = typename DistanceTableT::RealType; + using PosType = typename DistanceTableT::PosType; + +private: + /// atomic centers + std::vector> AtomicCenters; + /// table index + int myTableID; + /// mapping supercell to primitive cell + std::vector Super2Prim; + /// r from distance table + RealType dist_r; + /// dr from distance table + PosType dist_dr; + /// for APBC + PointType r_image; + /// smooth function value + RealType f; + /// smooth function first derivative + RealType df_dr; + /// smooth function second derivative + RealType d2f_dr2; + /// smoothing schemes + enum class smoothing_schemes + { + CONSISTENT = 0, + SMOOTHALL, + SMOOTHPARTIAL + } smooth_scheme; + /// smoothing function + smoothing_functions smooth_func_id; + +public: + HybridRepCenterOrbitalsT() + { + } + + void + set_info(const ParticleSetT& ions, ParticleSetT& els, + const std::vector& mapping) + { + myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); + Super2Prim = mapping; + } + + inline void + resizeStorage(size_t Nb) + { + size_t SplineCoefsBytes = 0; + + for (int ic = 0; ic < AtomicCenters.size(); ic++) { + AtomicCenters[ic].resizeStorage(Nb); + SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); + } + + app_log() + << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " + << "for the atomic radial splines in hybrid orbital representation" + << std::endl; + } + + void + bcast_tables(Communicate* comm) + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].bcast_tables(comm); + } + + void + gather_atomic_tables(Communicate* comm, std::vector& offset) + { + if (comm->size() == 1) + return; + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].gather_tables(comm, offset); + } + + inline void + flush_zero() + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].flush_zero(); + } + + bool + read_splines(hdf_archive& h5f) + { + bool success = true; + size_t ncenter; + + try { + h5f.push("atomic_centers", false); + } + catch (...) { + success = false; + } + success = success && h5f.readEntry(ncenter, "number_of_centers"); + if (!success) + return success; + if (ncenter != AtomicCenters.size()) + success = false; + // read splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) { + std::ostringstream gname; + gname << "center_" << ic; + try { + h5f.push(gname.str().c_str(), false); + } + catch (...) { + success = false; + } + success = success && AtomicCenters[ic].read_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + bool + write_splines(hdf_archive& h5f) + { + bool success = true; + int ncenter = AtomicCenters.size(); + try { + h5f.push("atomic_centers", true); + } + catch (...) { + success = false; + } + success = success && h5f.writeEntry(ncenter, "number_of_centers"); + // write splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) { + std::ostringstream gname; + gname << "center_" << ic; + try { + h5f.push(gname.str().c_str(), true); + } + catch (...) { + success = false; + } + success = success && AtomicCenters[ic].write_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + template + inline int + get_bc_sign( + const PointType& r, const Cell& PrimLattice, TinyVector& HalfG) + { + int bc_sign = 0; + PointType shift_unit = PrimLattice.toUnit(r - r_image); + for (int i = 0; i < D; i++) { + ST img = round(shift_unit[i]); + bc_sign += HalfG[i] * (int)img; + } + return bc_sign; + } + + // evaluate only V + template + inline RealType + evaluate_v(const ParticleSetT& P, const int iat, VV& myV) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor( + iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_v(dist_r, dr, myV); + return smooth_function( + myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + /* check if the batched algorithm is safe to operate + * @param VP virtual particle set + * @return true if it is safe + * + * When the reference electron in the NLPP evaluation has a distance larger + * than the non overlapping radius of the reference center. Some qudrature + * points may get its SPOs evaluated from the nearest center which is not + * the reference center. The batched algorthm forces the evaluation on the + * reference center and introduce some error. In this case, the non-batched + * algorithm should be used. + */ + bool + is_batched_safe(const VirtualParticleSetT& VP) + { + const int center_idx = VP.refSourcePtcl; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + return VP.getRefPS().getDistTableAB(myTableID).getDistRow( + VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius(); + } + + // C2C, C2R cases + template + inline RealType + evaluateValuesC2X(const VirtualParticleSetT& VP, VM& multi_myV) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow( + VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) { + myCenter.evaluateValues( + VP.getDistTableAB(myTableID).getDisplacements(), center_idx, + dist_r, multi_myV); + return smooth_function( + myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // R2R case + template + inline RealType + evaluateValuesR2R(const VirtualParticleSetT& VP, + const Cell& PrimLattice, TinyVector& HalfG, VM& multi_myV, + SV& bc_signs) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow( + VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) { + const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); + for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) { + r_image = myCenter.getCenterPos() - displ[ivp][center_idx]; + bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG); + ; + } + myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV); + return smooth_function( + myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGL + template + inline RealType + evaluate_vgl( + const ParticleSetT& P, const int iat, VV& myV, GV& myG, VV& myL) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor( + iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL); + return smooth_function( + myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGH + template + inline RealType + evaluate_vgh( + const ParticleSetT& P, const int iat, VV& myV, GV& myG, HT& myH) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor( + iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH); + return smooth_function( + myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // interpolate buffer region, value only + template + inline void + interpolate_buffer_v(VV& psi, const VV& psi_AO) const + { + const RealType cone(1); + for (size_t i = 0; i < psi.size(); i++) + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + + // interpolate buffer region, value, gradients and laplacian + template + inline void + interpolate_buffer_vgl(VV& psi, GV& dpsi, VV& d2psi, const VV& psi_AO, + const GV& dpsi_AO, const VV& d2psi_AO) const + { + const RealType cone(1), ctwo(2); + const RealType rinv(1.0 / dist_r); + if (smooth_scheme == smoothing_schemes::CONSISTENT) + for (size_t i = 0; i < psi.size(); + i++) { // psi, dpsi, d2psi are all consistent + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + + (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHALL) + for (size_t i = 0; i < psi.size(); i++) { + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) + for (size_t i = 0; i < psi.size(); + i++) { // dpsi, d2psi are consistent but psi is not. + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else + throw std::runtime_error("Unknown smooth scheme!"); + } + + inline RealType + smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r) + { + const RealType cone(1); + if (r < cutoff_buffer) + return cone; + const RealType scale = cone / (cutoff - cutoff_buffer); + const RealType x = (r - cutoff_buffer) * scale; + f = smoothing(smooth_func_id, x, df_dr, d2f_dr2); + df_dr *= scale; + d2f_dr2 *= scale * scale; + return f; + } + + template + friend class HybridRepSetReaderT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h new file mode 100644 index 0000000000..6f3dd504a9 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h @@ -0,0 +1,292 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_HYBRIDREP_CPLXT_H +#define QMCPLUSPLUS_HYBRIDREP_CPLXT_H + +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +namespace qmcplusplus +{ +/** hybrid representation orbitals combining B-spline orbitals on a grid and + * atomic centered orbitals. + * @tparam SPLINEBASE B-spline orbital class. + * + * Only works with SPLINEBASE class containing complex splines + */ +template +class HybridRepCplxT : + public SPLINEBASE, + private HybridRepCenterOrbitalsT +{ +public: + using HYBRIDBASE = HybridRepCenterOrbitalsT; + using ST = typename SPLINEBASE::DataType; + using PointType = typename SPLINEBASE::PointType; + using SingleSplineType = typename SPLINEBASE::SingleSplineType; + using RealType = typename SPLINEBASE::RealType; + // types for evaluation results + using typename SPLINEBASE::GGGVector; + using typename SPLINEBASE::GradMatrix; + using typename SPLINEBASE::GradType; + using typename SPLINEBASE::GradVector; + using typename SPLINEBASE::HessVector; + using typename SPLINEBASE::OffloadMWVGLArray; + using typename SPLINEBASE::ValueMatrix; + using typename SPLINEBASE::ValueType; + using typename SPLINEBASE::ValueVector; + +private: + ValueVector psi_AO, d2psi_AO; + GradVector dpsi_AO; + Matrix> multi_myV; + + using SPLINEBASE::HalfG; + using SPLINEBASE::myG; + using SPLINEBASE::myH; + using SPLINEBASE::myL; + using SPLINEBASE::myV; + +public: + HybridRepCplxT(const std::string& my_name) : SPLINEBASE(my_name) + { + } + + std::string + getClassName() const final + { + return "Hybrid" + SPLINEBASE::getClassName(); + } + std::string + getKeyword() const final + { + return "Hybrid" + SPLINEBASE::getKeyword(); + } + bool + isOMPoffload() const final + { + return false; + } + + std::unique_ptr> + makeClone() const override + { + return std::make_unique(*this); + } + + inline void + resizeStorage(size_t n, size_t nvals) + { + SPLINEBASE::resizeStorage(n, nvals); + HYBRIDBASE::resizeStorage(myV.size()); + } + + void + bcast_tables(Communicate* comm) + { + SPLINEBASE::bcast_tables(comm); + HYBRIDBASE::bcast_tables(comm); + } + + void + gather_tables(Communicate* comm) + { + SPLINEBASE::gather_tables(comm); + HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); + } + + bool + read_splines(hdf_archive& h5f) + { + return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); + } + + bool + write_splines(hdf_archive& h5f) + { + return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); + } + + inline void + flush_zero() + { + // SPLINEBASE::flush_zero(); + HYBRIDBASE::flush_zero(); + } + + void + evaluateValue(const ParticleSetT& P, const int iat, + ValueVector& psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_v(P, iat, myV); + const RealType cone(1); + if (smooth_factor < 0) { + SPLINEBASE::evaluateValue(P, iat, psi); + } + else if (smooth_factor == cone) { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_v(r, myV, psi, 0, myV.size() / 2); + } + else { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + SPLINEBASE::assign_v(r, myV, psi_AO, 0, myV.size() / 2); + SPLINEBASE::evaluateValue(P, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + } + + void + evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, + std::vector& ratios) override + { + if (VP.isOnSphere()) { + // resize scratch space + psi_AO.resize(psi.size()); + if (multi_myV.rows() < VP.getTotalNum()) + multi_myV.resize(VP.getTotalNum(), myV.size()); + const RealType smooth_factor = + HYBRIDBASE::evaluateValuesC2X(VP, multi_myV); + const RealType cone(1); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + if (smooth_factor < 0) + SPLINEBASE::evaluateValue(VP, iat, psi); + else if (smooth_factor == cone) { + const PointType& r = VP.R[iat]; + Vector> myV_one( + multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(r, myV_one, psi, 0, myV.size() / 2); + } + else { + const PointType& r = VP.R[iat]; + Vector> myV_one( + multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(r, myV_one, psi_AO, 0, myV.size() / 2); + SPLINEBASE::evaluateValue(VP, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + else { + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + } + + void + mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& + vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final + { + BsplineSetT::mw_evaluateDetRatios( + spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); + } + + void + evaluateVGL(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + { + const RealType smooth_factor = + HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL); + const RealType cone(1); + if (smooth_factor < 0) { + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + } + else if (smooth_factor == cone) { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_vgl_from_l(r, psi, dpsi, d2psi); + } + else { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + dpsi_AO.resize(psi.size()); + d2psi_AO.resize(psi.size()); + SPLINEBASE::assign_vgl_from_l(r, psi_AO, dpsi_AO, d2psi_AO); + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + HYBRIDBASE::interpolate_buffer_vgl( + psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO); + } + } + + void + mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final + { + BsplineSetT::mw_evaluateVGL( + sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + + void + mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const final + { + BsplineSetT::mw_evaluateVGLandDetRatioGrads( + spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); + } + + void + evaluateVGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluate_vgh not implemented!"); + if (HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH)) { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_vgh( + r, psi, dpsi, grad_grad_psi, 0, myV.size() / 2); + } + else + SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void + evaluateVGHGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluate_vghgh not implemented!"); + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final + { + // bypass SPLINEBASE::evaluate_notranspose + BsplineSetT::evaluate_notranspose( + P, first, last, logdet, dlogdet, d2logdet); + } + + template + friend class HybridRepSetReaderT; + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h new file mode 100644 index 0000000000..eea06ea1d1 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h @@ -0,0 +1,303 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +////////////////////////////////////////////////////////////////////////////////////// + +/** @file HybridRepReal.h + * + * hold HybridRepReal + */ +#ifndef QMCPLUSPLUS_HYBRIDREP_REALT_H +#define QMCPLUSPLUS_HYBRIDREP_REALT_H + +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +namespace qmcplusplus +{ +/** hybrid representation orbitals combining B-spline orbitals on a grid and + * atomic centered orbitals. + * @tparam SPLINEBASE B-spline orbital class. + * + * Only works with SPLINEBASE class containing real splines + */ +template +class HybridRepRealT : + public SPLINEBASE, + private HybridRepCenterOrbitalsT +{ +public: + using HYBRIDBASE = HybridRepCenterOrbitalsT; + using ST = typename SPLINEBASE::DataType; + using PointType = typename SPLINEBASE::PointType; + using SingleSplineType = typename SPLINEBASE::SingleSplineType; + using RealType = typename SPLINEBASE::RealType; + // types for evaluation results + using typename SPLINEBASE::GGGVector; + using typename SPLINEBASE::GradMatrix; + using typename SPLINEBASE::GradType; + using typename SPLINEBASE::GradVector; + using typename SPLINEBASE::HessVector; + using typename SPLINEBASE::OffloadMWVGLArray; + using typename SPLINEBASE::ValueMatrix; + using typename SPLINEBASE::ValueType; + using typename SPLINEBASE::ValueVector; + +private: + ValueVector psi_AO, d2psi_AO; + GradVector dpsi_AO; + Matrix> multi_myV; + + using SPLINEBASE::HalfG; + using SPLINEBASE::myG; + using SPLINEBASE::myH; + using SPLINEBASE::myL; + using SPLINEBASE::myV; + using SPLINEBASE::PrimLattice; + +public: + HybridRepRealT(const std::string& my_name) : SPLINEBASE(my_name) + { + } + + std::string + getClassName() const final + { + return "Hybrid" + SPLINEBASE::getClassName(); + } + std::string + getKeyword() const final + { + return "Hybrid" + SPLINEBASE::getKeyword(); + } + bool + isOMPoffload() const final + { + return false; + } + + std::unique_ptr> + makeClone() const override + { + return std::make_unique(*this); + } + + inline void + resizeStorage(size_t n, size_t nvals) + { + SPLINEBASE::resizeStorage(n, nvals); + HYBRIDBASE::resizeStorage(myV.size()); + } + + void + bcast_tables(Communicate* comm) + { + SPLINEBASE::bcast_tables(comm); + HYBRIDBASE::bcast_tables(comm); + } + + void + gather_tables(Communicate* comm) + { + SPLINEBASE::gather_tables(comm); + HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); + } + + inline void + flush_zero() + { + // SPLINEBASE::flush_zero(); + HYBRIDBASE::flush_zero(); + } + + bool + read_splines(hdf_archive& h5f) + { + return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); + } + + bool + write_splines(hdf_archive& h5f) + { + return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); + } + + void + evaluateValue(const ParticleSetT& P, const int iat, + ValueVector& psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_v(P, iat, myV); + const RealType cone(1); + if (smooth_factor < 0) { + SPLINEBASE::evaluateValue(P, iat, psi); + } + else if (smooth_factor == cone) { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_v(bc_sign, myV, psi, 0, myV.size()); + } + else { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_v(bc_sign, myV, psi_AO, 0, myV.size()); + SPLINEBASE::evaluateValue(P, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + } + + void + evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, + std::vector& ratios) override + { + if (VP.isOnSphere() && HYBRIDBASE::is_batched_safe(VP)) { + // resize scratch space + psi_AO.resize(psi.size()); + if (multi_myV.rows() < VP.getTotalNum()) + multi_myV.resize(VP.getTotalNum(), myV.size()); + std::vector bc_signs(VP.getTotalNum()); + const RealType smooth_factor = HYBRIDBASE::evaluateValuesR2R( + VP, PrimLattice, HalfG, multi_myV, bc_signs); + const RealType cone(1); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + if (smooth_factor < 0) + SPLINEBASE::evaluateValue(VP, iat, psi); + else if (smooth_factor == cone) { + Vector> myV_one( + multi_myV[iat], myV.size()); + SPLINEBASE::assign_v( + bc_signs[iat], myV_one, psi, 0, myV.size()); + } + else { + Vector> myV_one( + multi_myV[iat], myV.size()); + SPLINEBASE::assign_v( + bc_signs[iat], myV_one, psi_AO, 0, myV.size()); + SPLINEBASE::evaluateValue(VP, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + else { + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + } + + void + mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& + vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final + { + BsplineSetT::mw_evaluateDetRatios( + spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); + } + + void + evaluateVGL(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + { + const RealType smooth_factor = + HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL); + const RealType cone(1); + if (smooth_factor < 0) { + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + } + else if (smooth_factor == cone) { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgl_from_l(bc_sign, psi, dpsi, d2psi); + } + else { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + dpsi_AO.resize(psi.size()); + d2psi_AO.resize(psi.size()); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgl_from_l(bc_sign, psi_AO, dpsi_AO, d2psi_AO); + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + HYBRIDBASE::interpolate_buffer_vgl( + psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO); + } + } + + void + mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final + { + BsplineSetT::mw_evaluateVGL( + sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + + void + mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const final + { + BsplineSetT::mw_evaluateVGLandDetRatioGrads( + spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); + } + + void + evaluateVGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) override + { + APP_ABORT("HybridRepReal::evaluateVGH not implemented!"); + if (HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH)) { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgh( + bc_sign, psi, dpsi, grad_grad_psi, 0, myV.size()); + } + else + SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void + evaluateVGHGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluateVGHGH not implemented!"); + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final + { + // bypass SPLINEBASE::evaluate_notranspose + BsplineSetT::evaluate_notranspose( + P, first, last, logdet, dlogdet, d2logdet); + } + + template + friend class HybridRepSetReaderT; + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h index 1e25e2ae11..a54219c80c 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h @@ -21,6 +21,7 @@ #include "Numerics/Quadrature.h" #include "Numerics/Bessel.h" #include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" #include "OhmmsData/AttributeSet.h" #include "CPU/math.hpp" #include "Concurrency/OpenMP.h" diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h new file mode 100644 index 0000000000..affb06638c --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h @@ -0,0 +1,492 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_HYBRIDREP_READERT_H +#define QMCPLUSPLUS_HYBRIDREP_READERT_H + +#include "CPU/math.hpp" +#include "Concurrency/OpenMP.h" +#include "Numerics/Bessel.h" +#include "Numerics/Quadrature.h" +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" + +namespace qmcplusplus +{ + +/** General HybridRepSetReader to handle any unitcell + */ +template +class HybridRepSetReaderT : public SplineSetReaderT +{ +public: + using BaseReader = SplineSetReaderT; + + using BaseReader::bspline; + using BaseReader::mybuilder; + using BaseReader::rotate_phase_i; + using BaseReader::rotate_phase_r; + using typename BaseReader::DataType; + using typename BaseReader::ValueType; + + HybridRepSetReaderT(EinsplineSetBuilderT* e) : BaseReader(e) + { + } + + /** initialize basic parameters of atomic orbitals */ + void + initialize_hybridrep_atomic_centers() override + { + OhmmsAttributeSet a; + std::string scheme_name("Consistent"); + std::string s_function_name("LEKS2018"); + a.add(scheme_name, "smoothing_scheme"); + a.add(s_function_name, "smoothing_function"); + a.put(mybuilder->XMLRoot); + // assign smooth_scheme + if (scheme_name == "Consistent") + this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; + else if (scheme_name == "SmoothAll") + bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL; + else if (scheme_name == "SmoothPartial") + bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; + else + APP_ABORT( + "initialize_hybridrep_atomic_centers wrong smoothing_scheme " + "name! Only allows Consistent, SmoothAll or " + "SmoothPartial."); + + // assign smooth_function + if (s_function_name == "LEKS2018") + bspline->smooth_func_id = smoothing_functions::LEKS2018; + else if (s_function_name == "coscos") + bspline->smooth_func_id = smoothing_functions::COSCOS; + else if (s_function_name == "linear") + bspline->smooth_func_id = smoothing_functions::LINEAR; + else + APP_ABORT( + "initialize_hybridrep_atomic_centers wrong smoothing_function " + "name! Only allows LEKS2018, coscos or linear."); + app_log() << "Hybrid orbital representation uses " << scheme_name + << " smoothing scheme and " << s_function_name + << " smoothing function." << std::endl; + + bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, + mybuilder->Super2Prim); + auto& centers = bspline->AtomicCenters; + auto& ACInfo = mybuilder->AtomicCentersInfo; + // load atomic center info only when it is not initialized + if (centers.size() == 0) { + bool success = true; + app_log() << "Reading atomic center info for hybrid representation" + << std::endl; + for (int center_idx = 0; center_idx < ACInfo.Ncenters; + center_idx++) { + const int my_GroupID = ACInfo.GroupID[center_idx]; + if (ACInfo.cutoff[center_idx] < 0) { + app_error() << "Hybrid orbital representation needs " + "parameter 'cutoff_radius' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.inner_cutoff[center_idx] < 0) { + const double inner_cutoff = + std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); + app_log() << "Hybrid orbital representation setting " + "'inner_cutoff' to " + << inner_cutoff << " for group " << my_GroupID + << " as atom " << center_idx << std::endl; + // overwrite the inner_cutoff of all the atoms of the same + // species + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) + ACInfo.inner_cutoff[id] = inner_cutoff; + } + else if (ACInfo.inner_cutoff[center_idx] > + ACInfo.cutoff[center_idx]) { + app_error() + << "Hybrid orbital representation 'inner_cutoff' must " + "be smaller than 'spline_radius' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.cutoff[center_idx] > 0) { + if (ACInfo.lmax[center_idx] < 0) { + app_error() << "Hybrid orbital representation needs " + "parameter 'lmax' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.spline_radius[center_idx] < 0 && + ACInfo.spline_npoints[center_idx] < 0) { + app_log() << "Parameters 'spline_radius' and " + "'spline_npoints' for group " + << my_GroupID << " as atom " << center_idx + << " are not specified." << std::endl; + const double delta = + std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); + const int n_grid_point = + std::ceil( + (ACInfo.cutoff[center_idx] + 1e-4) / delta) + + 3; + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) { + ACInfo.spline_npoints[id] = n_grid_point; + ACInfo.spline_radius[id] = + (n_grid_point - 1) * delta; + } + app_log() << " Based on default grid point distance " + << delta << std::endl; + app_log() + << " Setting 'spline_npoints' to " + << ACInfo.spline_npoints[center_idx] << std::endl; + app_log() + << " Setting 'spline_radius' to " + << ACInfo.spline_radius[center_idx] << std::endl; + } + else { + if (ACInfo.spline_radius[center_idx] < 0) { + app_error() + << "Hybrid orbital representation needs " + "parameter 'spline_radius' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.spline_npoints[center_idx] < 0) { + app_error() + << "Hybrid orbital representation needs " + "parameter 'spline_npoints' for atom " + << center_idx << std::endl; + success = false; + } + } + + // check maximally allowed cutoff_radius + double max_allowed_cutoff = + ACInfo.spline_radius[center_idx] - + 2.0 * ACInfo.spline_radius[center_idx] / + (ACInfo.spline_npoints[center_idx] - 1); + if (success && + ACInfo.cutoff[center_idx] > max_allowed_cutoff) { + app_error() << "Hybrid orbital representation requires " + "cutoff_radius<=" + << max_allowed_cutoff + << " calculated by " + "spline_radius-2*spline_radius/" + "(spline_npoints-1) for atom " + << center_idx << std::endl; + success = false; + } + } + else { + // no atomic regions for this atom type + ACInfo.spline_radius[center_idx] = 0.0; + ACInfo.spline_npoints[center_idx] = 0; + ACInfo.lmax[center_idx] = 0; + } + } + if (!success) + BaseReader::myComm->barrier_and_abort( + "initialize_hybridrep_atomic_centers Failed to initialize " + "atomic centers " + "in hybrid orbital representation!"); + + for (int center_idx = 0; center_idx < ACInfo.Ncenters; + center_idx++) { + AtomicOrbitalsT oneCenter(ACInfo.lmax[center_idx]); + oneCenter.set_info(ACInfo.ion_pos[center_idx], + ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], + ACInfo.spline_radius[center_idx], + ACInfo.non_overlapping_radius[center_idx], + ACInfo.spline_npoints[center_idx]); + centers.push_back(oneCenter); + } + } + } + + /** initialize construct atomic orbital radial functions from plane waves */ + inline void + create_atomic_centers_Gspace(Vector>& cG, + Communicate& band_group_comm, int iorb) override + { + band_group_comm.bcast(rotate_phase_r); + band_group_comm.bcast(rotate_phase_i); + band_group_comm.bcast(cG); + // distribute G-vectors over processor groups + const int Ngvecs = mybuilder->Gvecs[0].size(); + const int Nprocs = band_group_comm.size(); + const int Ngvecgroups = std::min(Ngvecs, Nprocs); + Communicate gvec_group_comm(band_group_comm, Ngvecgroups); + std::vector gvec_groups(Ngvecgroups + 1, 0); + FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); + const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; + const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; + + // prepare Gvecs Ylm(G) + using UnitCellType = + typename EinsplineSetBuilderT::UnitCellType; + Gvectors Gvecs(mybuilder->Gvecs[0], + mybuilder->PrimCell, bspline->HalfG, gvec_first, gvec_last); + // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << + // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; + + std::vector>& centers = bspline->AtomicCenters; + app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; + // collect atomic centers by group + std::vector uniq_species; + for (int center_idx = 0; center_idx < centers.size(); center_idx++) { + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + int found_idx = -1; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) { + found_idx = idx; + break; + } + if (found_idx < 0) + uniq_species.push_back(my_GroupID); + } + // construct group list + std::vector> group_list(uniq_species.size()); + for (int center_idx = 0; center_idx < centers.size(); center_idx++) { + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) { + group_list[idx].push_back(center_idx); + break; + } + } + + for (int group_idx = 0; group_idx < group_list.size(); group_idx++) { + const auto& mygroup = group_list[group_idx]; + const double spline_radius = centers[mygroup[0]].getSplineRadius(); + const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); + const int lmax = centers[mygroup[0]].getLmax(); + const double delta = + spline_radius / static_cast(spline_npoints - 1); + const int lm_tot = (lmax + 1) * (lmax + 1); + const size_t natoms = mygroup.size(); + const int policy = lm_tot > natoms ? 0 : 1; + + std::vector> i_power(lm_tot); + // rotate phase is introduced here. + std::complex i_temp(rotate_phase_r, rotate_phase_i); + for (size_t l = 0; l <= lmax; l++) { + for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) + i_power[lm] = i_temp; + i_temp *= std::complex(0.0, 1.0); + } + + std::vector> all_vals(natoms); + std::vector>> vals_local( + spline_npoints * omp_get_max_threads()); + VectorSoaContainer myRSoA(natoms); + for (size_t idx = 0; idx < natoms; idx++) { + all_vals[idx].resize(spline_npoints, lm_tot * 2); + all_vals[idx] = 0.0; + myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); + } + +#pragma omp parallel + { + const size_t tid = omp_get_thread_num(); + const size_t nt = omp_get_num_threads(); + + for (int ip = 0; ip < spline_npoints; ip++) { + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) { + vals_local[ip_idx].resize(lm_tot * 2); + for (size_t lm = 0; lm < lm_tot * 2; lm++) { + auto& vals = vals_local[ip_idx][lm]; + vals.resize(natoms); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + else { + vals_local[ip_idx].resize(natoms * 2); + for (size_t iat = 0; iat < natoms * 2; iat++) { + auto& vals = vals_local[ip_idx][iat]; + vals.resize(lm_tot); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + } + + const size_t size_pw_tile = 32; + const size_t num_pw_tiles = + (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; + aligned_vector j_lm_G(lm_tot, 0.0); + std::vector> phase_shift_r(size_pw_tile); + std::vector> phase_shift_i(size_pw_tile); + std::vector> YlmG(size_pw_tile); + for (size_t ig = 0; ig < size_pw_tile; ig++) { + phase_shift_r[ig].resize(natoms); + phase_shift_i[ig].resize(natoms); + YlmG[ig].resize(lm_tot); + } + SoaSphericalTensor Ylm(lmax); + +#pragma omp for + for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) { + const size_t ig_first = tile_id * size_pw_tile; + const size_t ig_last = + std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); + for (size_t ig = ig_first; ig < ig_last; ig++) { + const size_t ig_local = ig - ig_first; + // calculate phase shift for all the centers of this + // group + Gvecs.calc_phase_shift(myRSoA, ig, + phase_shift_r[ig_local], phase_shift_i[ig_local]); + Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); + } + + for (int ip = 0; ip < spline_npoints; ip++) { + double r = delta * static_cast(ip); + const size_t ip_idx = tid * spline_npoints + ip; + + for (size_t ig = ig_first; ig < ig_last; ig++) { + const size_t ig_local = ig - ig_first; + // calculate spherical bessel function + Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); + for (size_t lm = 0; lm < lm_tot; lm++) + j_lm_G[lm] *= YlmG[ig_local][lm]; + + const double cG_r = cG[ig + gvec_first].real(); + const double cG_i = cG[ig + gvec_first].imag(); + if (policy == 1) { + for (size_t lm = 0; lm < lm_tot; lm++) { + double* restrict vals_r = + vals_local[ip_idx][lm * 2].data(); + double* restrict vals_i = + vals_local[ip_idx][lm * 2 + 1].data(); + const double* restrict ps_r_ptr = + phase_shift_r[ig_local].data(); + const double* restrict ps_i_ptr = + phase_shift_i[ig_local].data(); + double cG_j_r = cG_r * j_lm_G[lm]; + double cG_j_i = cG_i * j_lm_G[lm]; +#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr \ + : QMC_SIMD_ALIGNMENT) + for (size_t idx = 0; idx < natoms; idx++) { + const double ps_r = ps_r_ptr[idx]; + const double ps_i = ps_i_ptr[idx]; + vals_r[idx] += + cG_j_r * ps_r - cG_j_i * ps_i; + vals_i[idx] += + cG_j_i * ps_r + cG_j_r * ps_i; + } + } + } + else { + for (size_t idx = 0; idx < natoms; idx++) { + double* restrict vals_r = + vals_local[ip_idx][idx * 2].data(); + double* restrict vals_i = + vals_local[ip_idx][idx * 2 + 1].data(); + const double* restrict j_lm_G_ptr = + j_lm_G.data(); + double cG_ps_r = + cG_r * phase_shift_r[ig_local][idx] - + cG_i * phase_shift_i[ig_local][idx]; + double cG_ps_i = + cG_i * phase_shift_r[ig_local][idx] + + cG_r * phase_shift_i[ig_local][idx]; +#pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT) + for (size_t lm = 0; lm < lm_tot; lm++) { + const double jlm = j_lm_G_ptr[lm]; + vals_r[lm] += cG_ps_r * jlm; + vals_i[lm] += cG_ps_i * jlm; + } + } + } + } + } + } + +#pragma omp for collapse(2) + for (int ip = 0; ip < spline_npoints; ip++) + for (size_t idx = 0; idx < natoms; idx++) { + double* vals = all_vals[idx][ip]; + for (size_t tid = 0; tid < nt; tid++) + for (size_t lm = 0; lm < lm_tot; lm++) { + double vals_th_r, vals_th_i; + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) { + vals_th_r = vals_local[ip_idx][lm * 2][idx]; + vals_th_i = + vals_local[ip_idx][lm * 2 + 1][idx]; + } + else { + vals_th_r = vals_local[ip_idx][idx * 2][lm]; + vals_th_i = + vals_local[ip_idx][idx * 2 + 1][lm]; + } + const double real_tmp = + 4.0 * M_PI * i_power[lm].real(); + const double imag_tmp = + 4.0 * M_PI * i_power[lm].imag(); + vals[lm] += + vals_th_r * real_tmp - vals_th_i * imag_tmp; + vals[lm + lm_tot] += + vals_th_i * real_tmp + vals_th_r * imag_tmp; + } + } + } + // app_log() << "Building band " << iorb << " at center " << + // center_idx << std::endl; + + for (size_t idx = 0; idx < natoms; idx++) { + // reduce all_vals + band_group_comm.reduce_in_place( + all_vals[idx].data(), all_vals[idx].size()); + if (!band_group_comm.isGroupLeader()) + continue; +#pragma omp parallel for + for (int lm = 0; lm < lm_tot; lm++) { + auto& mycenter = centers[mygroup[idx]]; + aligned_vector splineData_r(spline_npoints); + UBspline_1d_d* atomic_spline_r = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_r[ip] = all_vals[idx][ip][lm]; + atomic_spline_r = einspline::create(atomic_spline_r, 0.0, + spline_radius, spline_npoints, splineData_r.data(), + ((lm == 0) || (lm > 3))); + if (!bspline->isComplex()) { + mycenter.set_spline(atomic_spline_r, lm, iorb); + einspline::destroy(atomic_spline_r); + } + else { + aligned_vector splineData_i(spline_npoints); + UBspline_1d_d* atomic_spline_i = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; + atomic_spline_i = einspline::create(atomic_spline_i, + 0.0, spline_radius, spline_npoints, + splineData_i.data(), ((lm == 0) || (lm > 3))); + mycenter.set_spline(atomic_spline_r, lm, iorb * 2); + mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); + einspline::destroy(atomic_spline_r); + einspline::destroy(atomic_spline_i); + } + } + } + } + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp index 69cf51d09e..9c02ad06d2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -9,1215 +9,1409 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #include "SplineC2COMPTargetT.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/OMPTarget/ompReductionComplex.hpp" + #include "ApplyPhaseC2C.hpp" #include "Concurrency/OpenMP.h" +#include "Platforms/OMPTarget/ompReductionComplex.hpp" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" +#include "spline2/MultiBsplineEval_OMPoffload.hpp" namespace qmcplusplus { -template -SplineC2COMPTargetT::SplineC2COMPTargetT(const SplineC2COMPTargetT& in) = default; - -template -inline void SplineC2COMPTargetT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2COMPTargetT::SplineC2COMPTargetT( + const SplineC2COMPTargetT& in) = default; + +template +inline void +SplineC2COMPTargetT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, int twist, int ispline, int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2COMPTargetT::read_splines(hdf_archive& h5f) +template +bool +SplineC2COMPTargetT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2COMPTargetT::write_splines(hdf_archive& h5f) +template +bool +SplineC2COMPTargetT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void SplineC2COMPTargetT::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void +SplineC2COMPTargetT::assign_v(const PointType& r, + const vContainer_type& myV, ValueVector& psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); + const ST x = r[0], y = r[1], z = r[2]; + const ST* restrict kx = myKcart->data(0); + const ST* restrict ky = myKcart->data(1); + const ST* restrict kz = myKcart->data(2); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } + for (size_t j = first; j < last; ++j) { + ST s, c; + const ST val_r = myV[2 * j]; + const ST val_i = myV[2 * j + 1]; + omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + psi[j + this->first_spo] = + ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); + } } -template -void SplineC2COMPTargetT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) +template +void +SplineC2COMPTargetT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first / 2, last / 2); + } } -template -void SplineC2COMPTargetT::evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +SplineC2COMPTargetT::evaluateDetRatios( + const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) { - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 3); + const int nVP = VP.getTotalNum(); + psiinv_pos_copy.resize(psiinv.size() + nVP * 3); + + // stage psiinv to psiinv_pos_copy + std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); + + // pack particle positions + auto* restrict pos_scratch = + reinterpret_cast(psiinv_pos_copy.data() + psiinv.size()); + for (int iat = 0; iat < nVP; ++iat) { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_scratch[iat * 6] = r[0]; + pos_scratch[iat * 6 + 1] = r[1]; + pos_scratch[iat * 6 + 2] = r[2]; + pos_scratch[iat * 6 + 3] = ru[0]; + pos_scratch[iat * 6 + 4] = ru[1]; + pos_scratch[iat * 6 + 5] = ru[2]; + } - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + ratios_private.resize(nVP, NumTeams); + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * nVP); + const auto orb_size = psiinv.size(); + results_scratch.resize(padded_size * nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* psiinv_ptr = psiinv_pos_copy.data(); + auto* ratios_private_ptr = ratios_private.data(); + const size_t first_spo_local = this->first_spo; - // pack particle positions - auto* restrict pos_scratch = reinterpret_cast(psiinv_pos_copy.data() + psiinv.size()); - for (int iat = 0; iat < nVP; ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * nVP); - const auto orb_size = psiinv.size(); - results_scratch.resize(padded_size * nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD( + "omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; - auto* restrict pos_scratch = reinterpret_cast(psiinv_ptr + orb_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } + for (int iat = 0; iat < nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = + omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iat_ptr = + offload_scratch_ptr + padded_size * iat; + auto* restrict psi_iat_ptr = + results_scratch_ptr + padded_size * iat; + auto* restrict pos_scratch = + reinterpret_cast(psiinv_ptr + orb_size); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, + ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), + ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, + first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_v(ST(pos_scratch[iat * 6]), + ST(pos_scratch[iat * 6 + 1]), + ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr, + offload_scratch_iat_ptr, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + + ComplexT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_cplx; i < last_cplx; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + for (int iat = 0; iat < nVP; ++iat) { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < NumTeams; tid++) + ratios[iat] += ratios_private[iat][tid]; + } } -template -void SplineC2COMPTargetT::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void +SplineC2COMPTargetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSetT& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = - reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - const VirtualParticleSetT& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) - { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; + auto& mw_ratios_private = mw_mem.mw_ratios_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const size_t nw = spo_list.size(); + const size_t orb_size = phi_leader.size(); + + size_t mw_nVP = 0; + for (const VirtualParticleSetT& VP : vp_list) + mw_nVP += VP.getTotalNum(); + + const size_t packed_size = + nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); + det_ratios_buffer_H2D.resize(packed_size); + + // pack invRow_ptr_list to det_ratios_buffer_H2D + Vector ptr_buffer( + reinterpret_cast(det_ratios_buffer_H2D.data()), nw); + for (size_t iw = 0; iw < nw; iw++) + ptr_buffer[iw] = invRow_ptr_list[iw]; + + // pack particle positions + auto* pos_ptr = reinterpret_cast( + det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); + auto* ref_id_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); + size_t iVP = 0; + for (size_t iw = 0; iw < nw; iw++) { + const VirtualParticleSetT& VP = vp_list[iw]; + assert(ratios_list[iw].size() == VP.getTotalNum()); + for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { + ref_id_ptr[iVP] = iw; + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_ptr[0] = r[0]; + pos_ptr[1] = r[1]; + pos_ptr[2] = r[2]; + pos_ptr[3] = ru[0]; + pos_ptr[4] = ru[1]; + pos_ptr[5] = ru[2]; + pos_ptr += 6; + } } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto padded_size = myV.size(); - mw_offload_scratch.resize(padded_size * mw_nVP); - mw_results_scratch.resize(padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + mw_ratios_private.resize(mw_nVP, NumTeams); + const auto padded_size = myV.size(); + mw_offload_scratch.resize(padded_size * mw_nVP); + mw_results_scratch.resize(padded_size * mw_nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); + auto* ratios_private_ptr = mw_ratios_private.data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD( + "omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4], - pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(pos_scratch[iat * 6], pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], psi_iat_ptr, - offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; + for (int iat = 0; iat < mw_nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = + omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iat_ptr = + offload_scratch_ptr + padded_size * iat; + auto* restrict psi_iat_ptr = + results_scratch_ptr + padded_size * iat; + auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); + auto* restrict psiinv_ptr = reinterpret_cast( + buffer_H2D_ptr)[ref_id_ptr[iat]]; + auto* restrict pos_scratch = reinterpret_cast( + buffer_H2D_ptr + nw * sizeof(ValueType*)); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, + pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4], + pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, + first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_v(pos_scratch[iat * 6], + pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], + psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + + ComplexT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_cplx; i < last_cplx; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + iVP = 0; + for (size_t iw = 0; iw < nw; iw++) { + auto& ratios = ratios_list[iw]; + for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < NumTeams; ++tid) + ratios[iat] += mw_ratios_private[iVP][tid]; + } } - } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2COMPTargetT::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void +SplineC2COMPTargetT::assign_vgl_from_l( + const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; + constexpr ST two(2); + const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); - const size_t N = this->last_spo - this->first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd - for (size_t j = 0; j < N; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } + for (size_t j = 0; j < N; ++j) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lap_r = myL[jr] + (*mKK)[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + (*mKK)[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + d2psi[psiIndex] = + ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); + } } -template -void SplineC2COMPTargetT::evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +SplineC2COMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi.size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS); + const auto orb_size = psi.size(); + // for V(1)G(3)L(1) final result + results_scratch.resize(padded_size * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto x = r[0], y = r[1], z = r[2]; + const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, mKK_ptr, offload_scratch_ptr, padded_size, G, - myKcart_ptr, myKcart_padded_size, first_spo_local, index); + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ + map(always, from: results_scratch_ptr[0:padded_size*5])") + for (int team_id = 0; team_id < NumTeams; team_id++) { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = + omptarget::min(first + ChunkSizePerTeam, padded_size); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, + iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], + PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], + PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], + PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], + GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], + GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, + first + index, a, b, c, da, db, dc, d2a, d2b, d2c, + offload_scratch_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_ptr[padded_size * SoAFields3D::LAPL + + output_index] = + SymTrace( + offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + + output_index], + symGGt); + } + + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, + mKK_ptr, offload_scratch_ptr, padded_size, G, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + } + } + + for (size_t i = 0; i < orb_size; i++) { + psi[i] = results_scratch[i]; + dpsi[i][0] = results_scratch[i + padded_size]; + dpsi[i][1] = results_scratch[i + padded_size * 2]; + dpsi[i][2] = results_scratch[i + padded_size * 3]; + d2psi[i] = results_scratch[i + padded_size * 4]; } - } - - for (size_t i = 0; i < orb_size; i++) - { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + padded_size]; - dpsi[i][1] = results_scratch[i + padded_size * 2]; - dpsi[i][2] = results_scratch[i + padded_size * 3]; - d2psi[i] = results_scratch[i + padded_size * 4]; - } } -template -void SplineC2COMPTargetT::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void +SplineC2COMPTargetT::evaluateVGLMultiPos( + const Vector>& multi_pos, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi_v_list[0].get().size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + const size_t num_pos = psi_v_list.size(); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); + const auto orb_size = psi_v_list[0].get().size(); + // for V(1)G(3)L(1) final result + results_scratch.resize(padded_size * num_pos * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* pos_copy_ptr = multi_pos.data(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD( + "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: pos_copy_ptr[0:num_pos*6]) \ map(always, from: results_scratch_ptr[0:padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = + omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + + padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = + results_scratch_ptr + padded_size * iw * 5; + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], + d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, + pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], + pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, + d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], + PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], + PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], + PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], + GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], + GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, + first + index, a, b, c, da, db, dc, d2a, d2b, d2c, + offload_scratch_iw_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + + output_index] = + SymTrace(offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS00 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS01 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS02 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS11 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS12 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS22 + + output_index], + symGGt); + } + + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(pos_copy_ptr[iw * 6], + pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], + psi_iw_ptr, padded_size, mKK_ptr, + offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + } + } - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, - padded_size, mKK_ptr, offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) - { - auto* restrict results_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < orb_size; i++) - { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + padded_size * 4]; + for (int iw = 0; iw < num_pos; ++iw) { + auto* restrict results_iw_ptr = + results_scratch_ptr + padded_size * iw * 5; + ValueVector& psi_v(psi_v_list[iw]); + GradVector& dpsi_v(dpsi_v_list[iw]); + ValueVector& d2psi_v(d2psi_v_list[iw]); + for (size_t i = 0; i < orb_size; i++) { + psi_v[i] = results_iw_ptr[i]; + dpsi_v[i][0] = results_iw_ptr[i + padded_size]; + dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2]; + dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3]; + d2psi_v[i] = results_iw_ptr[i + padded_size * 4]; + } } - } } -template -void SplineC2COMPTargetT::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader>& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void +SplineC2COMPTargetT::mw_evaluateVGL( + const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.template getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, - d2psi_v_list); + assert(this == &sa_list.getLeader()); + auto& phi_leader = sa_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& mw_pos_copy = mw_mem.mw_pos_copy; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = sa_list.size(); + mw_pos_copy.resize(nwalkers * 6); + + // pack particle positions + for (int iw = 0; iw < nwalkers; ++iw) { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + mw_pos_copy[iw * 6] = r[0]; + mw_pos_copy[iw * 6 + 1] = r[1]; + mw_pos_copy[iw * 6 + 2] = r[2]; + mw_pos_copy[iw * 6 + 3] = ru[0]; + mw_pos_copy[iw * 6 + 4] = ru[1]; + mw_pos_copy[iw * 6 + 5] = ru[2]; + } + + phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, + mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); } -template -void SplineC2COMPTargetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader>& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const +template +void +SplineC2COMPTargetT::mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto orb_size = phi_vgl_v.size(2); - const auto padded_size = myV.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = this->first_spo; - const size_t phi_vgl_stride = num_pos * orb_size; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& buffer_H2D = mw_mem.buffer_H2D; + auto& rg_private = mw_mem.rg_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = spo_list.size(); + buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); + + // pack particle positions and invRow pointers. + for (int iw = 0; iw < nwalkers; ++iw) { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); + + pos_copy[0] = r[0]; + pos_copy[1] = r[1]; + pos_copy[2] = r[2]; + pos_copy[3] = ru[0]; + pos_copy[4] = ru[1]; + pos_copy[5] = ru[2]; + + auto& invRow_ptr = *reinterpret_cast( + buffer_H2D[iw] + sizeof(ST) * 6); + invRow_ptr = invRow_ptr_list[iw]; + } + + const size_t num_pos = nwalkers; + const auto orb_size = phi_vgl_v.size(2); + const auto padded_size = myV.size(); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); + // for V(1)G(3)L(1) final result + mw_results_scratch.resize(padded_size * num_pos * 5); + // per team ratio and grads + rg_private.resize(num_pos, NumTeams * 4); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* buffer_H2D_ptr = buffer_H2D.data(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + auto* phi_vgl_ptr = phi_vgl_v.data(); + auto* rg_private_ptr = rg_private.data(); + const size_t buffer_H2D_stride = buffer_H2D.cols(); + const size_t first_spo_local = this->first_spo; + const size_t phi_vgl_stride = num_pos * orb_size; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD( + "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, - c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = + omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + + padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = + results_scratch_ptr + padded_size * iw * 5; + const auto* restrict pos_iw_ptr = reinterpret_cast( + buffer_H2D_ptr + buffer_H2D_stride * iw); + const auto* restrict invRow_iw_ptr = + *reinterpret_cast(buffer_H2D_ptr + + buffer_H2D_stride * iw + sizeof(ST) * 6); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], + d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], + pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, c, da, db, + dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], + PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], + PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], + PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], + GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], + GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, + first + index, a, b, c, da, db, dc, d2a, d2b, d2c, + offload_scratch_iw_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + + output_index] = + SymTrace(offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS00 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS01 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS02 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS11 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS12 + + output_index], + offload_scratch_iw_ptr[padded_size * + SoAFields3D::HESS22 + + output_index], + symGGt); + } + + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], + psi_iw_ptr, padded_size, mKK_ptr, + offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + + ValueType* restrict psi = psi_iw_ptr; + ValueType* restrict dpsi_x = psi_iw_ptr + padded_size; + ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2; + ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3; + ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4; + + ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size; + ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; + ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; + ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; + ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; + + ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); + PRAGMA_OFFLOAD("omp parallel for \ + reduction(+: ratio, grad_x, grad_y, grad_z)") + for (size_t j = first_cplx; j < last_cplx; j++) { + const size_t psiIndex = first_spo_local + j; + + out_phi[psiIndex] = psi[psiIndex]; + out_dphi_x[psiIndex] = dpsi_x[psiIndex]; + out_dphi_y[psiIndex] = dpsi_y[psiIndex]; + out_dphi_z[psiIndex] = dpsi_z[psiIndex]; + out_d2phi[psiIndex] = d2psi[psiIndex]; + + ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex]; + } + + rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; + } + } - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, padded_size, mKK_ptr, - offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, - index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + padded_size; - ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2; - ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4; - - ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (size_t j = first_cplx; j < last_cplx; j++) - { - const size_t psiIndex = first_spo_local + j; - - out_phi[psiIndex] = psi[psiIndex]; - out_dphi_x[psiIndex] = dpsi_x[psiIndex]; - out_dphi_y[psiIndex] = dpsi_y[psiIndex]; - out_dphi_z[psiIndex] = dpsi_z[psiIndex]; - out_d2phi[psiIndex] = d2psi[psiIndex]; - - ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex]; + for (int iw = 0; iw < num_pos; iw++) { + ValueType ratio(0); + for (int team_id = 0; team_id < NumTeams; team_id++) + ratio += rg_private[iw][team_id * 4]; + ratios[iw] = ratio; + + ValueType grad_x(0), grad_y(0), grad_z(0); + for (int team_id = 0; team_id < NumTeams; team_id++) { + grad_x += rg_private[iw][team_id * 4 + 1]; + grad_y += rg_private[iw][team_id * 4 + 2]; + grad_z += rg_private[iw][team_id * 4 + 3]; } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } - - for (int iw = 0; iw < num_pos; iw++) - { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; + grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; - } } -template -void SplineC2COMPTargetT::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const + +template +void +SplineC2COMPTargetT::assign_vgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } + for (size_t j = first; j < last; ++j) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02) + + kX * (gX_i + dX_i); + const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12) + + kX * (gY_i + dY_i); + const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22) + + kX * (gZ_i + dZ_i); + const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g00, g01, g02) + + kY * (gX_i + dX_i); + const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12) + + kY * (gY_i + dY_i); + const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22) + + kY * (gZ_i + dZ_i); + const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g00, g01, g02) + + kZ * (gX_i + dX_i); + const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g10, g11, g12) + + kZ * (gY_i + dY_i); + const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22) + + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02) - + kX * (gX_r + dX_r); + const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12) - + kX * (gY_r + dY_r); + const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22) - + kX * (gZ_r + dZ_r); + const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g00, g01, g02) - + kY * (gX_r + dX_r); + const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12) - + kY * (gY_r + dY_r); + const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22) - + kY * (gZ_r + dZ_r); + const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g00, g01, g02) - + kZ * (gX_r + dX_r); + const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g10, g11, g12) - + kZ * (gY_r + dY_r); + const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22) - + kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = + ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = + ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); + grad_grad_psi[psiIndex][4] = + ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = + ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); + grad_grad_psi[psiIndex][7] = + ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); + grad_grad_psi[psiIndex][8] = + ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + } } -template -void SplineC2COMPTargetT::evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void +SplineC2COMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); + } } -template -void SplineC2COMPTargetT::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void +SplineC2COMPTargetT::assign_vghgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, + int first, int last) const { - // protect last - last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. + // protect last + last = last < 0 ? + this->kPoints.size() : + (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); + +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } + for (size_t j = first; j < last; ++j) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = + ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][4] = + ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][7] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][8] = + ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - + kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + + kX * kX * kX * val_r; + const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - + (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - + (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - + (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - + (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - + (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - + (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - + kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - + (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + + kX * kY * kZ * val_r; + const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - + (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - + (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - + kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + + kY * kY * kY * val_r; + const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - + (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - + (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - + (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - + (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - + kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + + kZ * kZ * kZ * val_r; + + grad_grad_grad_psi[psiIndex][0][0] = + ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); + grad_grad_grad_psi[psiIndex][0][1] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][2] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][3] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][4] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][0][5] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][6] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][7] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][8] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + + grad_grad_grad_psi[psiIndex][1][0] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][1][1] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][2] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][3] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][4] = + ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); + grad_grad_grad_psi[psiIndex][1][5] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][6] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][7] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][8] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + + grad_grad_grad_psi[psiIndex][2][0] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][2][1] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][2] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][3] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][4] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][2][5] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][6] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][7] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][8] = + ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); + } } -template -void SplineC2COMPTargetT::evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void +SplineC2COMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, + last / 2); + } } -template -void SplineC2COMPTargetT::evaluate_notranspose(const ParticleSetT& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +SplineC2COMPTargetT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) - { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) - { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols()); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols()); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols()); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } + // chunk the [first, last) loop into blocks to save temporary memory usage + const int block_size = 16; + + // reference vectors refer to the rows of matrices + std::vector multi_psi_v; + std::vector multi_dpsi_v; + std::vector multi_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + + multi_psi_v.reserve(block_size); + multi_dpsi_v.reserve(block_size); + multi_d2psi_v.reserve(block_size); + psi_v_list.reserve(block_size); + dpsi_v_list.reserve(block_size); + d2psi_v_list.reserve(block_size); + + for (int iat = first, i = 0; iat < last; + iat += block_size, i += block_size) { + const int actual_block_size = std::min(last - iat, block_size); + multi_pos_copy.resize(actual_block_size * 6); + multi_psi_v.clear(); + multi_dpsi_v.clear(); + multi_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); + + for (int ipos = 0; ipos < actual_block_size; ++ipos) { + // pack particle positions + const PointType& r = P.activeR(iat + ipos); + PointType ru(PrimLattice.toUnit_floor(r)); + multi_pos_copy[ipos * 6] = r[0]; + multi_pos_copy[ipos * 6 + 1] = r[1]; + multi_pos_copy[ipos * 6 + 2] = r[2]; + multi_pos_copy[ipos * 6 + 3] = ru[0]; + multi_pos_copy[ipos * 6 + 4] = ru[1]; + multi_pos_copy[ipos * 6 + 5] = ru[2]; + + multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols()); + multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols()); + multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols()); + + psi_v_list.push_back(multi_psi_v[ipos]); + dpsi_v_list.push_back(multi_dpsi_v[ipos]); + d2psi_v_list.push_back(multi_d2psi_v[ipos]); + } - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); - } + evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, + psi_v_list, dpsi_v_list, d2psi_v_list); + } } -template class SplineC2COMPTargetT; -template class SplineC2COMPTargetT; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h index a1c7a2cd2f..86c20dfd5d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h @@ -1,6 +1,6 @@ - ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -9,316 +9,369 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// - -/** @file SplineC2COMPTargetT.h +/** @file SplineC2COMPTarget.h * - * class to handle complex splines to complex orbitals with splines of arbitrary precision - * splines storage and computation is offloaded to accelerators using OpenMP target + * class to handle complex splines to complex orbitals with splines of arbitrary + * precision splines storage and computation is offloaded to accelerators using + * OpenMP target */ -#ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H -#define QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H +#ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGETT_H +#define QMCPLUSPLUS_SPLINE_C2C_OMPTARGETT_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" #include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "SplineOMPTargetMultiWalkerMem.h" #include "Utilities/FairDivide.h" #include "Utilities/TimerManager.h" +#include "spline2/MultiBspline.hpp" #include -#include "SplineOMPTargetMultiWalkerMem.h" + +#include namespace qmcplusplus { -/** class to match std::complex spline with BsplineSetT::ValueType (complex) SPOs with OpenMP offload +/** class to match std::complex spline with BsplineSet::ValueType (complex) + * SPOs with OpenMP offload * @tparam ST precision of spline * * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. All the output orbitals are complex. */ -template -class SplineC2COMPTargetT : public BsplineSetT> +template +class SplineC2COMPTargetT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using ComplexT = std::complex; - using GGGVector = typename BsplineSetT::GGGVector; - using GradType = typename BsplineSetT::GradType; - using GradVector = typename BsplineSetT::GradVector; - using GradMatrix = typename BsplineSetT::GradMatrix; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - using OffloadMWVGLArray = Array>; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - using RealType = typename SPOSetT::RealType; - using ValueType = typename SPOSetT::ValueType; - - using SPOSet = SPOSetT; - using SplineC2COMPTarget = SplineC2COMPTargetT; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + template + using OffloadVector = Vector>; + template + using OffloadPosVector = VectorSoaContainer>; private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; + /// timer for offload portion + NewTimer& offload_timer_; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr< + MultiBspline, OffloadAllocator>> + SplineInst; + + std::shared_ptr> mKK; + std::shared_ptr> myKcart; + std::shared_ptr> GGt_offload; + std::shared_ptr> PrimLattice_G_offload; + + ResourceHandle> mw_mem_handle_; + + /// team private ratios for reduction, numVP x numTeams + Matrix> ratios_private; + /// offload scratch space, dynamically resized to the maximal need + Vector> offload_scratch; + /// result scratch space, dynamically resized to the maximal need + Vector> results_scratch; + /// psiinv and position scratch space, used to avoid allocation on the fly + /// and faster transfer + Vector> psiinv_pos_copy; + /// position scratch space, used to avoid allocation on the fly and faster + /// transfer + Vector> multi_pos_copy; + + void + evaluateVGLMultiPos( + const Vector>& multi_pos_copy, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2COMPTargetT(const std::string& my_name) - : BsplineSetT(my_name), - offload_timer_(createGlobalTimer("SplineC2COMPTargetT::offload", timer_level_fine)), + SplineC2COMPTargetT(const std::string& my_name) : + BsplineSetT(my_name), + offload_timer_( + createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), GGt_offload(std::make_shared>(9)), PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2COMPTargetT(const SplineC2COMPTargetT& in); - - virtual std::string getClassName() const override { return "SplineC2COMPTargetT"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - BsplineSetT::init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") { - MultiSpline->coefs = coefs; } - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (size_t i = 0; i < 9; i++) + SplineC2COMPTargetT(const SplineC2COMPTargetT& in); + + virtual std::string + getClassName() const override { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; + return "SplineC2COMPTarget"; } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = this->kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) + virtual std::string + getKeyword() const override { - (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); - (*myKcart)(i) = this->kPoints[i]; + return "SplineC2C"; + } + bool + isComplex() const override + { + return true; + }; + virtual bool + isOMPoffload() const override + { + return true; + } + + void + createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource( + std::make_unique>()); + } + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = + spo_list.template getCastedLeader(); + phi_leader.mw_mem_handle_ = + collection + .lendResource>(); + } + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = + spo_list.template getCastedLeader(); + collection.takebackResource(phi_leader.mw_mem_handle_); + } + + std::unique_ptr> + makeClone() const override + { + return std::make_unique(*this); + } + + inline void + resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void + bcast_tables(Communicate* comm) + { + chunked_bcast(comm, SplineInst->getSplinePtr()); + } + + void + gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), + SplineInst->getSplinePtr()->z_stride, this->offset); } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader>& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader>& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSetT& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; + template + void + create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared, + OffloadAllocator>>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) + << " MB allocated " + << "for the coefficients in 3D spline orbital representation" + << std::endl; + } + + /// this routine can not be called from threaded region + void + finalizeConstruction() override + { + // map the SplineInst->getSplinePtr() structure to GPU + auto* MultiSpline = SplineInst->getSplinePtr(); + auto* restrict coefs = MultiSpline->coefs; + // attach pointers on the device to achieve deep copy + PRAGMA_OFFLOAD("omp target \ + map(always, to: MultiSpline[0:1], \ + coefs[0:MultiSpline->coefs_size])") + { + MultiSpline->coefs = coefs; + } + + // transfer static data to GPU + auto* mKK_ptr = mKK->data(); + PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") + auto* myKcart_ptr = myKcart->data(); + PRAGMA_OFFLOAD( + "omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") + for (size_t i = 0; i < 9; i++) { + (*GGt_offload)[i] = GGt[i]; + (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; + } + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") + auto* GGt_ptr = GGt_offload->data(); + PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + } + + inline void + flush_zero() + { + SplineInst->flush_zero(); + } + + /** remap kPoints to pack the double copy */ + inline void + resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK = std::make_shared>(nk); + myKcart = std::make_shared>(nk); + for (size_t i = 0; i < nk; ++i) { + (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); + (*myKcart)(i) = this->kPoints[i]; + } + } + + void + set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, + int twist, int ispline, int level); + + bool + read_splines(hdf_archive& h5f); + + bool + write_splines(hdf_archive& h5f); + + void + assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, + int first, int last) const; + + virtual void + evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) override; + + virtual void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) override; + + virtual void + mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void + assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi); + + virtual void + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + virtual void + mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + virtual void + mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const override; + + void + assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, int first, int last) const; + + virtual void + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) override; + + void + assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, + int last = -1) const; + + virtual void + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + virtual void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp index e6b05e4cd3..90edda7a96 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // @@ -10,56 +10,59 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2CT.h" -#include +#include "CPU/BLAS.hpp" +#include "CPU/math.hpp" #include "Concurrency/OpenMP.h" -#include "SplineC2CT.h" -#include "spline2/MultiBsplineEval.hpp" #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" +#include "spline2/MultiBsplineEval.hpp" + +#include namespace qmcplusplus { -template -SplineC2CT::SplineC2CT(const SplineC2CT& in) = default; - -template -inline void SplineC2CT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2CT::SplineC2CT(const SplineC2CT& in) = default; + +template +inline void +SplineC2CT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, int twist, int ispline, int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2CT::read_splines(hdf_archive& h5f) +template +bool +SplineC2CT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2CT::write_splines(hdf_archive& h5f) +template +bool +SplineC2CT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -void SplineC2CT::storeParamsBeforeRotation() +template +void +SplineC2CT::storeParamsBeforeRotation() { - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); + const auto spline_ptr = SplineInst->getSplinePtr(); + const auto coefs_tot_size = spline_ptr->coefs_size; + coef_copy_ = std::make_shared>(coefs_tot_size); - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); + std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); } /* @@ -101,700 +104,834 @@ void SplineC2CT::storeParamsBeforeRotation() NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix" is very tall and skinny. */ -template -void SplineC2CT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +template +void +SplineC2CT::applyRotation( + const ValueMatrix& rot_mat, bool use_stored_copy) { - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto basis_set_size = coefs_tot_size / Nsplines; - assert(this->OrbitalSetSize == rot_mat.rows()); - assert(this->OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - for (int i = 0; i < basis_set_size; i++) - for (int j = 0; j < this->OrbitalSetSize; j++) - { - // cur_elem points to the real componend of the coefficient. - // Imag component is adjacent in memory. - const auto cur_elem = Nsplines * i + 2 * j; - RealType newval_r{0.}; - RealType newval_i{0.}; - for (auto k = 0; k < this->OrbitalSetSize; k++) - { - const auto index = Nsplines * i + 2 * k; - RealType zr = (*coef_copy_)[index]; - RealType zi = (*coef_copy_)[index + 1]; - RealType wr = rot_mat[k][j].real(); - RealType wi = rot_mat[k][j].imag(); - newval_r += zr * wr - zi * wi; - newval_i += zr * wi + zi * wr; - } - spl_coefs[cur_elem] = newval_r; - spl_coefs[cur_elem + 1] = newval_i; + // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp + const auto spline_ptr = SplineInst->getSplinePtr(); + assert(spline_ptr != nullptr); + const auto spl_coefs = spline_ptr->coefs; + const auto Nsplines = spline_ptr->num_splines; // May include padding + const auto coefs_tot_size = spline_ptr->coefs_size; + const auto basis_set_size = coefs_tot_size / Nsplines; + assert(this->OrbitalSetSize == rot_mat.rows()); + assert(this->OrbitalSetSize == rot_mat.cols()); + + if (!use_stored_copy) { + assert(coef_copy_ != nullptr); + std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); + } + + if constexpr (std::is_same_v) { + // if ST is double, go ahead and use blas to make things faster + // Note that Nsplines needs to be divided by 2 since spl_coefs and + // coef_copy_ are stored as reals. Also casting them as ValueType so + // they are complex to do the correct gemm + BLAS::gemm('N', 'N', this->OrbitalSetSize, basis_set_size, + this->OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(), + this->OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, + ValueType(0.0, 0.0), (ValueType*)spl_coefs, Nsplines / 2); + } + else { + // if ST is float, RealType is double and ValueType is + // std::complex for C2C Just use naive matrix multiplication in + // order to avoid losing precision on rotation matrix + for (IndexType i = 0; i < basis_set_size; i++) + for (IndexType j = 0; j < this->OrbitalSetSize; j++) { + // cur_elem points to the real componend of the coefficient. + // Imag component is adjacent in memory. + const auto cur_elem = Nsplines * i + 2 * j; + ST newval_r{0.}; + ST newval_i{0.}; + for (IndexType k = 0; k < this->OrbitalSetSize; k++) { + const auto index = Nsplines * i + 2 * k; + ST zr = (*coef_copy_)[index]; + ST zi = (*coef_copy_)[index + 1]; + ST wr = rot_mat[k][j].real(); + ST wi = rot_mat[k][j].imag(); + newval_r += zr * wr - zi * wi; + newval_i += zr * wi + zi * wr; + } + spl_coefs[cur_elem] = newval_r; + spl_coefs[cur_elem + 1] = newval_i; + } } } -template -inline void SplineC2CT::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void +SplineC2CT::assign_v(const PointType& r, const vContainer_type& myV, + ValueVector& psi, int first, int last) const { - const auto kPointsSize = this->kPoints.size(); - // protect last - last = last > kPointsSize ? kPointsSize : last; - - const RealType x = r[0], y = r[1], z = r[2]; - const RealType* restrict kx = myKcart.data(0); - const RealType* restrict ky = myKcart.data(1); - const RealType* restrict kz = myKcart.data(2); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST x = r[0], y = r[1], z = r[2]; + const ST* restrict kx = myKcart.data(0); + const ST* restrict ky = myKcart.data(1); + const ST* restrict kz = myKcart.data(2); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - RealType s, c; - const RealType val_r = myV[2 * j]; - const RealType val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } + for (size_t j = first; j < last; ++j) { + ST s, c; + const ST val_r = myV[2 * j]; + const ST val_i = myV[2 * j + 1]; + qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + psi[j + this->first_spo] = + ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); + } } -template -void SplineC2CT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) +template +void +SplineC2CT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first / 2, last / 2); + } } -template -void SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) { - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); #pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) { + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier + } + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), tid, first, last); + const int first_cplx = first / 2; + const int last_cplx = + this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; + + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + spline2::evaluate3d( + SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first_cplx, last_cplx); + ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, + psiinv.data() + first_cplx, last_cplx - first_cplx); + } } - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const auto kPointsSize = this->kPoints.size(); - const int last_cplx = kPointsSize < last / 2 ? kPointsSize : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx); + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } } /** assign_vgl - */ -template -inline void SplineC2CT::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const + */ +template +inline void +SplineC2CT::assign_vgl(const PointType& r, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi, int first, int last) const { - // protect last - const auto kPointsSize = this->kPoints.size(); - last = last > kPointsSize ? kPointsSize : last; - - constexpr RealType zero(0); - constexpr RealType two(2); - const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const RealType x = r[0], y = r[1], z = r[2]; - const RealType symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const RealType* restrict k0 = myKcart.data(0); - const RealType* restrict k1 = myKcart.data(1); - const RealType* restrict k2 = myKcart.data(2); - - const RealType* restrict g0 = myG.data(0); - const RealType* restrict g1 = myG.data(1); - const RealType* restrict g2 = myG.data(2); - const RealType* restrict h00 = myH.data(0); - const RealType* restrict h01 = myH.data(1); - const RealType* restrict h02 = myH.data(2); - const RealType* restrict h11 = myH.data(3); - const RealType* restrict h12 = myH.data(4); - const RealType* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + constexpr ST zero(0); + constexpr ST two(2); + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], + GGt[5] + GGt[7], GGt[8]}; + + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const RealType kX = k0[j]; - const RealType kY = k1[j]; - const RealType kZ = k2[j]; - const RealType val_r = myV[jr]; - const RealType val_i = myV[ji]; - - //phase - RealType s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const RealType gX_r = dX_r + val_i * kX; - const RealType gY_r = dY_r + val_i * kY; - const RealType gZ_r = dZ_r + val_i * kZ; - const RealType gX_i = dX_i - val_r * kX; - const RealType gY_i = dY_i - val_r * kY; - const RealType gZ_i = dZ_i - val_r * kZ; - - const RealType lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const RealType lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const RealType lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const RealType lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } + for (size_t j = first; j < last; ++j) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lcart_r = SymTrace( + h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); + const ST lcart_i = SymTrace( + h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); + const ST lap_r = lcart_r + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = lcart_i + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + d2psi[psiIndex] = + ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); + } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2CT::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void +SplineC2CT::assign_vgl_from_l( + const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - constexpr RealType two(2); - const RealType x = r[0], y = r[1], z = r[2]; + constexpr ST two(2); + const ST x = r[0], y = r[1], z = r[2]; - const RealType* restrict k0 = myKcart.data(0); - const RealType* restrict k1 = myKcart.data(1); - const RealType* restrict k2 = myKcart.data(2); + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); - const RealType* restrict g0 = myG.data(0); - const RealType* restrict g1 = myG.data(1); - const RealType* restrict g2 = myG.data(2); + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); - const size_t N = this->last_spo - this->first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd - for (size_t j = 0; j < N; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const RealType kX = k0[j]; - const RealType kY = k1[j]; - const RealType kZ = k2[j]; - const RealType val_r = myV[jr]; - const RealType val_i = myV[ji]; - - //phase - RealType s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const RealType dX_r = g0[jr]; - const RealType dY_r = g1[jr]; - const RealType dZ_r = g2[jr]; - - const RealType dX_i = g0[ji]; - const RealType dY_i = g1[ji]; - const RealType dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const RealType gX_r = dX_r + val_i * kX; - const RealType gY_r = dY_r + val_i * kY; - const RealType gZ_r = dZ_r + val_i * kZ; - const RealType gX_i = dX_i - val_r * kX; - const RealType gY_i = dY_i - val_r * kY; - const RealType gZ_i = dZ_i - val_r * kZ; - - const RealType lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const RealType lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } + for (size_t j = 0; j < N; ++j) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lap_r = myL[jr] + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + d2psi[psiIndex] = + ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); + } } -template -void SplineC2CT::evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +SplineC2CT::evaluateVGL(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); + } } -template -void SplineC2CT::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void +SplineC2CT::assign_vgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const { - // protect last - const auto kPointsSize = this->kPoints.size(); - last = last > kPointsSize ? kPointsSize : last; - - const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const RealType x = r[0], y = r[1], z = r[2]; - - const RealType* restrict k0 = myKcart.data(0); - const RealType* restrict k1 = myKcart.data(1); - const RealType* restrict k2 = myKcart.data(2); - - const RealType* restrict g0 = myG.data(0); - const RealType* restrict g1 = myG.data(1); - const RealType* restrict g2 = myG.data(2); - const RealType* restrict h00 = myH.data(0); - const RealType* restrict h01 = myH.data(1); - const RealType* restrict h02 = myH.data(2); - const RealType* restrict h11 = myH.data(3); - const RealType* restrict h12 = myH.data(4); - const RealType* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const RealType kX = k0[j]; - const RealType kY = k1[j]; - const RealType kZ = k2[j]; - const RealType val_r = myV[jr]; - const RealType val_i = myV[ji]; - - //phase - RealType s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const RealType gX_r = dX_r + val_i * kX; - const RealType gY_r = dY_r + val_i * kY; - const RealType gZ_r = dZ_r + val_i * kZ; - const RealType gX_i = dX_i - val_r * kX; - const RealType gY_i = dY_i - val_r * kY; - const RealType gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const RealType h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const RealType h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const RealType h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const RealType h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const RealType h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const RealType h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const RealType h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const RealType h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const RealType h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const RealType h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const RealType h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const RealType h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const RealType h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const RealType h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const RealType h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const RealType h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const RealType h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const RealType h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } + for (size_t j = first; j < last; ++j) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02) + + kX * (gX_i + dX_i); + const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12) + + kX * (gY_i + dY_i); + const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22) + + kX * (gZ_i + dZ_i); + const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g00, g01, g02) + + kY * (gX_i + dX_i); + const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12) + + kY * (gY_i + dY_i); + const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22) + + kY * (gZ_i + dZ_i); + const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g00, g01, g02) + + kZ * (gX_i + dX_i); + const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g10, g11, g12) + + kZ * (gY_i + dY_i); + const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22) + + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02) - + kX * (gX_r + dX_r); + const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12) - + kX * (gY_r + dY_r); + const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22) - + kX * (gZ_r + dZ_r); + const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g00, g01, g02) - + kY * (gX_r + dX_r); + const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12) - + kY * (gY_r + dY_r); + const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22) - + kY * (gZ_r + dZ_r); + const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g00, g01, g02) - + kZ * (gX_r + dX_r); + const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g10, g11, g12) - + kZ * (gY_r + dY_r); + const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22) - + kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = + ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = + ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); + grad_grad_psi[psiIndex][4] = + ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = + ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); + grad_grad_psi[psiIndex][7] = + ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); + grad_grad_psi[psiIndex][8] = + ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + } } -template -void SplineC2CT::evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void +SplineC2CT::evaluateVGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); + } } -template -void SplineC2CT::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void +SplineC2CT::assign_vghgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, + int first, int last) const { - // protect last - const auto kPointsSize = this->kPoints.size(); - last = last < 0 ? kPointsSize : (last > kPointsSize ? kPointsSize : last); - - const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const RealType x = r[0], y = r[1], z = r[2]; - - const RealType* restrict k0 = myKcart.data(0); - const RealType* restrict k1 = myKcart.data(1); - const RealType* restrict k2 = myKcart.data(2); - - const RealType* restrict g0 = myG.data(0); - const RealType* restrict g1 = myG.data(1); - const RealType* restrict g2 = myG.data(2); - const RealType* restrict h00 = myH.data(0); - const RealType* restrict h01 = myH.data(1); - const RealType* restrict h02 = myH.data(2); - const RealType* restrict h11 = myH.data(3); - const RealType* restrict h12 = myH.data(4); - const RealType* restrict h22 = myH.data(5); - - const RealType* restrict gh000 = mygH.data(0); - const RealType* restrict gh001 = mygH.data(1); - const RealType* restrict gh002 = mygH.data(2); - const RealType* restrict gh011 = mygH.data(3); - const RealType* restrict gh012 = mygH.data(4); - const RealType* restrict gh022 = mygH.data(5); - const RealType* restrict gh111 = mygH.data(6); - const RealType* restrict gh112 = mygH.data(7); - const RealType* restrict gh122 = mygH.data(8); - const RealType* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. + // protect last + last = last < 0 ? + this->kPoints.size() : + (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); + +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const RealType kX = k0[j]; - const RealType kY = k1[j]; - const RealType kZ = k2[j]; - const RealType val_r = myV[jr]; - const RealType val_i = myV[ji]; - - //phase - RealType s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const RealType gX_r = dX_r + val_i * kX; - const RealType gY_r = dY_r + val_i * kY; - const RealType gZ_r = dZ_r + val_i * kZ; - const RealType gX_i = dX_i - val_r * kX; - const RealType gY_i = dY_i - val_r * kY; - const RealType gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const RealType f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const RealType f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const RealType f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const RealType f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const RealType f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const RealType f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const RealType f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const RealType f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const RealType f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const RealType f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const RealType f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const RealType f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const RealType h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const RealType h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const RealType h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const RealType h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const RealType h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const RealType h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const RealType h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const RealType h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const RealType h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const RealType h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const RealType h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const RealType h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const RealType f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const RealType f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const RealType f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const RealType f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const RealType f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const RealType f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const RealType f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const RealType f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const RealType f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const RealType f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const RealType f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const RealType f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const RealType f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const RealType f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const RealType f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const RealType f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const RealType f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const RealType f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const RealType f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const RealType f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const RealType gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const RealType gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const RealType gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const RealType gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const RealType gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const RealType gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const RealType gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const RealType gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const RealType gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const RealType gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const RealType gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const RealType gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const RealType gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const RealType gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const RealType gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const RealType gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const RealType gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const RealType gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const RealType gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const RealType gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } + for (size_t j = first; j < last; ++j) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = + ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = + ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][4] = + ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = + ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][7] = + ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][8] = + ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - + kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + + kX * kX * kX * val_r; + const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - + (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - + (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - + (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - + (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - + (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - + (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - + kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - + (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + + kX * kY * kZ * val_r; + const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - + (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - + (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - + kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + + kY * kY * kY * val_r; + const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - + (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - + (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - + (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - + (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - + kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + + kZ * kZ * kZ * val_r; + + grad_grad_grad_psi[psiIndex][0][0] = + ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); + grad_grad_grad_psi[psiIndex][0][1] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][2] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][3] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][4] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][0][5] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][6] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][7] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][8] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + + grad_grad_grad_psi[psiIndex][1][0] = + ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][1][1] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][2] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][3] = + ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][4] = + ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); + grad_grad_grad_psi[psiIndex][1][5] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][6] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][7] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][8] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + + grad_grad_grad_psi[psiIndex][2][0] = + ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][2][1] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][2] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][3] = + ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][4] = + ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][2][5] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][6] = + ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][7] = + ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][8] = + ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); + } } -template -void SplineC2CT::evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void +SplineC2CT::evaluateVGHGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, + last / 2); + } } -template class SplineC2CT>; -template class SplineC2CT>; +template class SplineC2CT>; +template class SplineC2CT>; +template class SplineC2CT>; +template class SplineC2CT>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h index a7ba99e272..e48a285ef1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // @@ -10,227 +10,262 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - /** @file * - * class to handle complex splines to complex orbitals with splines of arbitrary precision + * class to handle complex splines to complex orbitals with splines of arbitrary + * precision */ #ifndef QMCPLUSPLUS_SPLINE_C2CT_H #define QMCPLUSPLUS_SPLINE_C2CT_H -#include -#include "BsplineSetT.h" #include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" #include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs - * @tparam T precision of spline +/** class to match std::complex spline with BsplineSet::ValueType (complex) + * SPOs + * @tparam ST precision of spline * * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of T type, aligned and padded. - * All the output orbitals are complex. + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. All the output orbitals are complex. */ -template -class SplineC2CT : public BsplineSetT +template +class SplineC2CT : public BsplineSetT { public: - using RealType = typename BsplineSetT::RealType; - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = RealType; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - - // types for evaluation results - // only works for Complex - using ComplexT = T; - using ValueType = typename BsplineSetT::ValueType; - using GGGVector = typename BsplineSetT::GGGVector; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::IndexType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + +private: + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; + + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; + + vContainer_type mKK; + VectorSoaContainer myKcart; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; + +protected: + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2CT(const std::string& my_name) : BsplineSetT(my_name) {} - - SplineC2CT(const SplineC2CT& in); - virtual std::string getClassName() const final { return "SplineC2C"; } - virtual std::string getKeyword() const final { return "SplineC2C"; } - bool isComplex() const final { return true; }; - - std::unique_ptr> makeClone() const final { return std::make_unique>(*this); } - - bool isRotationSupported() const final { return true; } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() final; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - Some results (using older Berkeley branch) were published in [3]. - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; - - inline void resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - - auto& offset = this->offset; - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const auto& kPoints = this->kPoints; - const size_t nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) + SplineC2CT(const std::string& my_name) : BsplineSetT(my_name) { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + SplineC2CT(const SplineC2CT& in); + virtual std::string + getClassName() const override + { + return "SplineC2C"; + } + virtual std::string + getKeyword() const override + { + return "SplineC2C"; + } + bool + isComplex() const override + { + return true; + }; -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; + std::unique_ptr> + makeClone() const override + { + return std::make_unique(*this); + } - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; + bool + isRotationSupported() const override + { + return true; + } - vContainer_type mKK; - VectorSoaContainer myKcart; + /// Store an original copy of the spline coefficients for orbital rotation + void + storeParamsBeforeRotation() override; + + /* + Implements orbital rotations via [1,2]. + Should be called by RotatedSPOs::apply_rotation() + This implementation requires that NSPOs > Nelec. In other words, + if you want to run a orbopt wfn, you must include some virtual orbitals! + Some results (using older Berkeley branch) were published in [3]. + [1] Filippi & Fahy, JCP 112, (2000) + [2] Toulouse & Umrigar, JCP 126, (2007) + [3] Townsend et al., PRB 102, (2020) + */ + void + applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void + resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; -}; + void + bcast_tables(Communicate* comm) + { + chunked_bcast(comm, SplineInst->getSplinePtr()); + } + + void + gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), + SplineInst->getSplinePtr()->z_stride, this->offset); + } -extern template class SplineC2CT; -extern template class SplineC2CT; + template + void + create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) + << " MB allocated " + << "for the coefficients in 3D spline orbital representation" + << std::endl; + } + + inline void + flush_zero() + { + SplineInst->flush_zero(); + } + + /** remap kPoints to pack the double copy */ + inline void + resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) { + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; + } + } + + void + set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, + int twist, int ispline, int level); + + bool + read_splines(hdf_archive& h5f); + + bool + write_splines(hdf_archive& h5f); + + void + assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, + int first, int last) const; + + void + evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) override; + + /** assign_vgl + */ + void + assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void + assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi); + + void + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + void + assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, int first, int last) const; + + void + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) override; + + void + assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, + int last = -1) const; + + void + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp similarity index 96% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp index 93ada7660a..1e3e02cd6a 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp @@ -9,7 +9,7 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// -#include "SplineC2RTOMPTarget.h" +#include "SplineC2ROMPTargetT.h" #include "ApplyPhaseC2R.hpp" #include "Concurrency/OpenMP.h" @@ -19,22 +19,22 @@ namespace qmcplusplus { -template -SplineC2RTOMPTarget::SplineC2RTOMPTarget( - const SplineC2RTOMPTarget& in) = default; +template +SplineC2ROMPTargetT::SplineC2ROMPTargetT( + const SplineC2ROMPTargetT& in) = default; -template +template inline void -SplineC2RTOMPTarget::set_spline(SingleSplineType* spline_r, +SplineC2ROMPTargetT::set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level) { SplineInst->copy_spline(spline_r, 2 * ispline); SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template +template bool -SplineC2RTOMPTarget::read_splines(hdf_archive& h5f) +SplineC2ROMPTargetT::read_splines(hdf_archive& h5f) { std::ostringstream o; o << "spline_" << this->MyIndex; @@ -42,9 +42,9 @@ SplineC2RTOMPTarget::read_splines(hdf_archive& h5f) return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template +template bool -SplineC2RTOMPTarget::write_splines(hdf_archive& h5f) +SplineC2ROMPTargetT::write_splines(hdf_archive& h5f) { std::ostringstream o; o << "spline_" << this->MyIndex; @@ -52,9 +52,9 @@ SplineC2RTOMPTarget::write_splines(hdf_archive& h5f) return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template +template inline void -SplineC2RTOMPTarget::assign_v(const PointType& r, +SplineC2ROMPTargetT::assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const { // protect last @@ -89,10 +89,10 @@ SplineC2RTOMPTarget::assign_v(const PointType& r, } } -template +template void -SplineC2RTOMPTarget::evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) +SplineC2ROMPTargetT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -166,10 +166,11 @@ SplineC2RTOMPTarget::evaluateValue( } } -template +template void -SplineC2RTOMPTarget::evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) +SplineC2ROMPTargetT::evaluateDetRatios( + const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) { const int nVP = VP.getTotalNum(); psiinv_pos_copy.resize(psiinv.size() + nVP * 6); @@ -271,18 +272,17 @@ SplineC2RTOMPTarget::evaluateDetRatios(const VirtualParticleSetT& VP, } } -template +template void -SplineC2RTOMPTarget::mw_evaluateDetRatios( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, +SplineC2ROMPTargetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, const RefVector& psi_list, const std::vector& invRow_ptr_list, std::vector>& ratios_list) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; auto& mw_ratios_private = mw_mem.mw_ratios_private; @@ -292,7 +292,7 @@ SplineC2RTOMPTarget::mw_evaluateDetRatios( const size_t requested_orb_size = phi_leader.size(); size_t mw_nVP = 0; - for (const VirtualParticleSetT& VP : vp_list) + for (const VirtualParticleSetT& VP : vp_list) mw_nVP += VP.getTotalNum(); const size_t packed_size = @@ -312,7 +312,7 @@ SplineC2RTOMPTarget::mw_evaluateDetRatios( nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); size_t iVP = 0; for (size_t iw = 0; iw < nw; iw++) { - const VirtualParticleSetT& VP = vp_list[iw]; + const VirtualParticleSetT& VP = vp_list[iw]; assert(ratios_list[iw].size() == VP.getTotalNum()); for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { ref_id_ptr[iVP] = iw; @@ -420,9 +420,9 @@ SplineC2RTOMPTarget::mw_evaluateDetRatios( /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ -template +template inline void -SplineC2RTOMPTarget::assign_vgl_from_l( +SplineC2ROMPTargetT::assign_vgl_from_l( const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { constexpr ST two(2); @@ -539,10 +539,10 @@ SplineC2RTOMPTarget::assign_vgl_from_l( } } -template +template void -SplineC2RTOMPTarget::evaluateVGL(const ParticleSetT& P, const int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +SplineC2ROMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -643,9 +643,9 @@ SplineC2RTOMPTarget::evaluateVGL(const ParticleSetT& P, const int iat, } } -template +template void -SplineC2RTOMPTarget::evaluateVGLMultiPos( +SplineC2ROMPTargetT::evaluateVGLMultiPos( const Vector>& multi_pos, Vector>& offload_scratch, Vector>& results_scratch, @@ -771,18 +771,17 @@ SplineC2RTOMPTarget::evaluateVGLMultiPos( } } -template +template void -SplineC2RTOMPTarget::mw_evaluateVGL( - const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, +SplineC2ROMPTargetT::mw_evaluateVGL( + const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, const RefVector& psi_v_list, const RefVector& dpsi_v_list, const RefVector& d2psi_v_list) const { assert(this == &sa_list.getLeader()); - auto& phi_leader = - sa_list.template getCastedLeader>(); + auto& phi_leader = sa_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& mw_pos_copy = mw_mem.mw_pos_copy; auto& mw_offload_scratch = mw_mem.mw_offload_scratch; @@ -806,18 +805,17 @@ SplineC2RTOMPTarget::mw_evaluateVGL( mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); } -template +template void -SplineC2RTOMPTarget::mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, +SplineC2ROMPTargetT::mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, std::vector& grads) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& buffer_H2D = mw_mem.buffer_H2D; auto& rg_private = mw_mem.rg_private; @@ -1012,9 +1010,9 @@ SplineC2RTOMPTarget::mw_evaluateVGLandDetRatioGrads( } } -template +template void -SplineC2RTOMPTarget::assign_vgh(const PointType& r, ValueVector& psi, +SplineC2ROMPTargetT::assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const { // protect last @@ -1269,10 +1267,11 @@ SplineC2RTOMPTarget::assign_vgh(const PointType& r, ValueVector& psi, } } -template +template void -SplineC2RTOMPTarget::evaluateVGH(const ParticleSetT& P, const int iat, - ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) +SplineC2ROMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1288,16 +1287,15 @@ SplineC2RTOMPTarget::evaluateVGH(const ParticleSetT& P, const int iat, } } -template +template void -SplineC2RTOMPTarget::assign_vghgh(const PointType& r, ValueVector& psi, +SplineC2ROMPTargetT::assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first, int last) const { // protect last - last = last < 0 ? - this->kPoints.size() : - (last > this->kPoints.size() ? this->kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : + (last > this->kPoints.size() ? this->kPoints.size() : last); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), @@ -1847,11 +1845,11 @@ SplineC2RTOMPTarget::assign_vghgh(const PointType& r, ValueVector& psi, } } -template +template void -SplineC2RTOMPTarget::evaluateVGHGH(const ParticleSetT& P, const int iat, - ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +SplineC2ROMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1868,9 +1866,9 @@ SplineC2RTOMPTarget::evaluateVGHGH(const ParticleSetT& P, const int iat, } } -template +template void -SplineC2RTOMPTarget::evaluate_notranspose(const ParticleSetT& P, +SplineC2ROMPTargetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { @@ -1928,7 +1926,9 @@ SplineC2RTOMPTarget::evaluate_notranspose(const ParticleSetT& P, } } -template class SplineC2RTOMPTarget; -template class SplineC2RTOMPTarget; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h similarity index 82% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.h rename to src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h index e79ee57450..0d3aef1f2d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h @@ -15,8 +15,8 @@ * precision splines storage and computation is offloaded to accelerators using * OpenMP target */ -#ifndef QMCPLUSPLUS_SPLINE_C2RT_OMPTARGET_H -#define QMCPLUSPLUS_SPLINE_C2RT_OMPTARGET_H +#ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGETT_H +#define QMCPLUSPLUS_SPLINE_C2R_OMPTARGETT_H #include "OMPTarget/OffloadAlignedAllocators.hpp" #include "OhmmsSoA/VectorSoaContainer.h" @@ -42,8 +42,8 @@ namespace qmcplusplus * orbital. All the output orbitals are real (C2R). The maximal number of output * orbitals is OrbitalSetSize. */ -template -class SplineC2RTOMPTarget : public BsplineSetT +template +class SplineC2ROMPTargetT : public BsplineSetT { public: using SplineType = typename bspline_traits::SplineType; @@ -52,16 +52,16 @@ class SplineC2RTOMPTarget : public BsplineSetT using PointType = TinyVector; using SingleSplineType = UBspline_3d_d; // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using ValueType = typename BsplineSetT::ValueType; - using GradType = typename BsplineSetT::GradType; - using GGGVector = typename BsplineSetT::GGGVector; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - using GradMatrix = typename BsplineSetT::GradMatrix; - using OffloadMWVGLArray = typename BsplineSetT::OffloadMWVGLArray; + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; using vContainer_type = Vector>; using gContainer_type = VectorSoaContainer; @@ -126,8 +126,8 @@ class SplineC2RTOMPTarget : public BsplineSetT ghContainer_type mygH; public: - SplineC2RTOMPTarget(const std::string& my_name) : - BsplineSetT(my_name), + SplineC2ROMPTargetT(const std::string& my_name) : + BsplineSetT(my_name), offload_timer_( createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), nComplexBands(0), @@ -136,7 +136,7 @@ class SplineC2RTOMPTarget : public BsplineSetT { } - SplineC2RTOMPTarget(const SplineC2RTOMPTarget& in); + SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in); virtual std::string getClassName() const override @@ -168,27 +168,29 @@ class SplineC2RTOMPTarget : public BsplineSetT void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override + const RefVectorWithLeader>& spo_list) const override { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader>(); + auto& phi_leader = + spo_list.template getCastedLeader(); phi_leader.mw_mem_handle_ = collection.lendResource>(); } void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override + const RefVectorWithLeader>& spo_list) const override { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader>(); + auto& phi_leader = + spo_list.template getCastedLeader(); collection.takebackResource(phi_leader.mw_mem_handle_); } - std::unique_ptr> + std::unique_ptr> makeClone() const override { - return std::make_unique(*this); + return std::make_unique(*this); } inline void @@ -248,8 +250,9 @@ class SplineC2RTOMPTarget : public BsplineSetT auto* MultiSpline = SplineInst->getSplinePtr(); auto* restrict coefs = MultiSpline->coefs; // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], \ - coefs[0:MultiSpline->coefs_size])") + PRAGMA_OFFLOAD("omp target \ + map(always, to: MultiSpline[0:1], \ + coefs[0:MultiSpline->coefs_size])") { MultiSpline->coefs = coefs; } @@ -306,15 +309,15 @@ class SplineC2RTOMPTarget : public BsplineSetT virtual void evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + const ParticleSetT& P, const int iat, ValueVector& psi) override; virtual void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) override; virtual void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, + mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, const RefVector& psi_list, const std::vector& invRow_ptr_list, std::vector>& ratios_list) const override; @@ -327,20 +330,20 @@ class SplineC2RTOMPTarget : public BsplineSetT ValueVector& d2psi); virtual void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; virtual void - mw_evaluateVGL(const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, + mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, int iat, const RefVector& psi_v_list, const RefVector& dpsi_v_list, const RefVector& d2psi_v_list) const override; virtual void mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, std::vector& grads) const override; @@ -350,7 +353,7 @@ class SplineC2RTOMPTarget : public BsplineSetT HessVector& grad_grad_psi, int first, int last) const; virtual void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) override; void @@ -359,18 +362,20 @@ class SplineC2RTOMPTarget : public BsplineSetT int last = -1) const; virtual void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) override; virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) override; template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; + } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp index fd0e182bc0..6e5bf82b72 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp @@ -1,1191 +1,1328 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Anouar Benali, benali@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@intel.com, University of +// Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov, +// Argonne National Laboratory Anouar Benali, benali@anl.gov, +// Argonne National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2RT.h" +#include "CPU/math.hpp" #include "Concurrency/OpenMP.h" -#include "SplineC2RT.h" -#include "spline2/MultiBsplineEval.hpp" #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" +#include "spline2/MultiBsplineEval.hpp" namespace qmcplusplus { -template -SplineC2RT::SplineC2RT(const SplineC2RT& in) = default; - -template -inline void SplineC2RT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2RT::SplineC2RT(const SplineC2RT& in) = default; + +template +inline void +SplineC2RT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, int twist, int ispline, int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2RT::read_splines(hdf_archive& h5f) +template +bool +SplineC2RT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2RT::write_splines(hdf_archive& h5f) +template +bool +SplineC2RT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void SplineC2RT::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void +SplineC2RT::assign_v(const PointType& r, const vContainer_type& myV, + ValueVector& psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart.data(0); - const ST* restrict ky = myKcart.data(1); - const ST* restrict kz = myKcart.data(2); + const ST x = r[0], y = r[1], z = r[2]; + const ST* restrict kx = myKcart.data(0); + const ST* restrict ky = myKcart.data(1); + const ST* restrict kz = myKcart.data(2); - TT* restrict psi_s = psi.data() + this->first_spo; - const size_t requested_orb_size = psi.size(); + TT* restrict psi_s = psi.data() + this->first_spo; + const size_t requested_orb_size = psi.size(); #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (jr < requested_orb_size) - psi_s[jr] = val_r * c - val_i * s; - if (ji < requested_orb_size) - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; + for (size_t j = first; j < std::min(nComplexBands, last); j++) { + ST s, c; + const size_t jr = j << 1; + const size_t ji = jr + 1; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + if (jr < requested_orb_size) + psi_s[jr] = val_r * c - val_i * s; + if (ji < requested_orb_size) + psi_s[ji] = val_i * c + val_r * s; + } + + psi_s += nComplexBands; #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (j < requested_orb_size) - psi_s[j] = val_r * c - val_i * s; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) { + ST s, c; + const ST val_r = myV[2 * j]; + const ST val_i = myV[2 * j + 1]; + qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + if (j < requested_orb_size) + psi_s[j] = val_r * c - val_i * s; + } } -template -void SplineC2RT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) +template +void +SplineC2RT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first / 2, last / 2); + } } -template -void SplineC2RT::evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +SplineC2RT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) { - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); #pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) { + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier + } + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), + tid, first, last); + const int first_cplx = first / 2; + const int last_cplx = + this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; + + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + spline2::evaluate3d( + SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first_cplx, last_cplx); + + const int first_real = + first_cplx + std::min(nComplexBands, first_cplx); + const int last_real = + last_cplx + std::min(nComplexBands, last_cplx); + ratios_private[iat][tid] = simd::dot(psi.data() + first_real, + psiinv.data() + first_real, last_real - first_real); + } } - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - - const int first_real = first_cplx + std::min(nComplexBands, first_cplx); - const int last_real = last_cplx + std::min(nComplexBands, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_real, psiinv.data() + first_real, last_real - first_real); + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + ratios[iat] = TT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } } /** assign_vgl - */ -template -inline void SplineC2RT::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const + */ +template +inline void +SplineC2RT::assign_vgl(const PointType& r, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - constexpr ST two(2); - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - const ST* restrict h00 = myH.data(0); - ASSUME_ALIGNED(h00); - const ST* restrict h01 = myH.data(1); - ASSUME_ALIGNED(h01); - const ST* restrict h02 = myH.data(2); - ASSUME_ALIGNED(h02); - const ST* restrict h11 = myH.data(3); - ASSUME_ALIGNED(h11); - const ST* restrict h12 = myH.data(4); - ASSUME_ALIGNED(h12); - const ST* restrict h22 = myH.data(5); - ASSUME_ALIGNED(h22); - - const size_t requested_orb_size = psi.size(); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + constexpr ST two(2); + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], + GGt[5] + GGt[7], GGt[8]}; + + const ST* restrict k0 = myKcart.data(0); + ASSUME_ALIGNED(k0); + const ST* restrict k1 = myKcart.data(1); + ASSUME_ALIGNED(k1); + const ST* restrict k2 = myKcart.data(2); + ASSUME_ALIGNED(k2); + + const ST* restrict g0 = myG.data(0); + ASSUME_ALIGNED(g0); + const ST* restrict g1 = myG.data(1); + ASSUME_ALIGNED(g1); + const ST* restrict g2 = myG.data(2); + ASSUME_ALIGNED(g2); + const ST* restrict h00 = myH.data(0); + ASSUME_ALIGNED(h00); + const ST* restrict h01 = myH.data(1); + ASSUME_ALIGNED(h01); + const ST* restrict h02 = myH.data(2); + ASSUME_ALIGNED(h02); + const ST* restrict h11 = myH.data(3); + ASSUME_ALIGNED(h11); + const ST* restrict h12 = myH.data(4); + ASSUME_ALIGNED(h12); + const ST* restrict h22 = myH.data(5); + ASSUME_ALIGNED(h22); + + const size_t requested_orb_size = psi.size(); #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = this->first_spo + jr; - if (psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - d2psi[psiIndex] = c * lap_r - s * lap_i; + for (size_t j = first; j < std::min(nComplexBands, last); j++) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lcart_r = SymTrace( + h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); + const ST lcart_i = SymTrace( + h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); + const ST lap_r = lcart_r + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = lcart_i + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = this->first_spo + jr; + if (psiIndex < requested_orb_size) { + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + d2psi[psiIndex] = c * lap_r - s * lap_i; + } + if (psiIndex + 1 < requested_orb_size) { + psi[psiIndex + 1] = c * val_i + s * val_r; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + d2psi[psiIndex + 1] = c * lap_i + s * lap_r; + } } - if (psiIndex + 1 < requested_orb_size) - { - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - } - } #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - if (const size_t psiIndex = this->first_spo + nComplexBands + j; psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; + for (size_t j = std::max(nComplexBands, first); j < last; j++) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + if (const size_t psiIndex = this->first_spo + nComplexBands + j; + psiIndex < requested_orb_size) { + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + const ST lcart_r = SymTrace( + h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); + const ST lcart_i = SymTrace( + h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); + const ST lap_r = lcart_r + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = lcart_i + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + d2psi[psiIndex] = c * lap_r - s * lap_i; + } } - } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2RT::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void +SplineC2RT::assign_vgl_from_l( + const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; + constexpr ST two(2); + const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); + const ST* restrict k0 = myKcart.data(0); + ASSUME_ALIGNED(k0); + const ST* restrict k1 = myKcart.data(1); + ASSUME_ALIGNED(k1); + const ST* restrict k2 = myKcart.data(2); + ASSUME_ALIGNED(k2); - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); + const ST* restrict g0 = myG.data(0); + ASSUME_ALIGNED(g0); + const ST* restrict g1 = myG.data(1); + ASSUME_ALIGNED(g1); + const ST* restrict g2 = myG.data(2); + ASSUME_ALIGNED(g2); - const size_t N = this->kPoints.size(); + const size_t N = this->kPoints.size(); #pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = this->first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } + for (size_t j = 0; j < nComplexBands; j++) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lap_r = myL[jr] + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = this->first_spo + jr; + psi[psiIndex] = c * val_r - s * val_i; + psi[psiIndex + 1] = c * val_i + s * val_r; + d2psi[psiIndex] = c * lap_r - s * lap_i; + d2psi[psiIndex + 1] = c * lap_i + s * lap_r; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + } #pragma omp simd - for (size_t j = nComplexBands; j < N; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = this->first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } + for (size_t j = nComplexBands; j < N; j++) { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + const size_t psiIndex = this->first_spo + nComplexBands + j; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + const ST lap_r = myL[jr] + mKK[j] * val_r + + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + mKK[j] * val_i - + two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + d2psi[psiIndex] = c * lap_r - s * lap_i; + } } -template -void SplineC2RT::evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +SplineC2RT::evaluateVGL(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); + } } -template -void SplineC2RT::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void +SplineC2RT::assign_vgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } + for (size_t j = first; j < std::min(nComplexBands, last); j++) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + jr; + + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + psi[psiIndex + 1] = c * val_i + s * val_r; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + + const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02) + + kX * (gX_i + dX_i); + const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12) + + kX * (gY_i + dY_i); + const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22) + + kX * (gZ_i + dZ_i); + const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g00, g01, g02) + + kY * (gX_i + dX_i); + const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12) + + kY * (gY_i + dY_i); + const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22) + + kY * (gZ_i + dZ_i); + const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g00, g01, g02) + + kZ * (gX_i + dX_i); + const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g10, g11, g12) + + kZ * (gY_i + dY_i); + const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22) + + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02) - + kX * (gX_r + dX_r); + const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12) - + kX * (gY_r + dY_r); + const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22) - + kX * (gZ_r + dZ_r); + const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g00, g01, g02) - + kY * (gX_r + dX_r); + const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12) - + kY * (gY_r + dY_r); + const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22) - + kY * (gZ_r + dZ_r); + const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g00, g01, g02) - + kZ * (gX_r + dX_r); + const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g10, g11, g12) - + kZ * (gY_r + dY_r); + const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22) - + kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; + grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; + grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; + grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; + grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; + grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; + grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; + } #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + nComplexBands + j; + + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02) + + kX * (gX_i + dX_i); + const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12) + + kX * (gY_i + dY_i); + const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22) + + kX * (gZ_i + dZ_i); + const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g00, g01, g02) + + kY * (gX_i + dX_i); + const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12) + + kY * (gY_i + dY_i); + const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22) + + kY * (gZ_i + dZ_i); + const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g00, g01, g02) + + kZ * (gX_i + dX_i); + const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g10, g11, g12) + + kZ * (gY_i + dY_i); + const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22) + + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02) - + kX * (gX_r + dX_r); + const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12) - + kX * (gY_r + dY_r); + const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22) - + kX * (gZ_r + dZ_r); + const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g00, g01, g02) - + kY * (gX_r + dX_r); + const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12) - + kY * (gY_r + dY_r); + const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22) - + kY * (gZ_r + dZ_r); + const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g00, g01, g02) - + kZ * (gX_r + dX_r); + const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g10, g11, g12) - + kZ * (gY_r + dY_r); + const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22) - + kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; + grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + } } -template -void SplineC2RT::evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void +SplineC2RT::evaluateVGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); + } } -template -void SplineC2RT::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void +SplineC2RT::assign_vghgh(const PointType& r, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, + int first, int last) const { - // protect last - last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. + // protect last + last = last < 0 ? this->kPoints.size() : + (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart.data(0); + const ST* restrict k1 = myKcart.data(1); + const ST* restrict k2 = myKcart.data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); + +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } + for (size_t j = first; j < std::min(nComplexBands, last); j++) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + jr; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + psi[psiIndex + 1] = c * val_i + s * val_r; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; + grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; + grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - + kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + + kX * kX * kX * val_r; + const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - + (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - + (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - + (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - + (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - + (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - + (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - + kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - + (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + + kX * kY * kZ * val_r; + const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - + (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - + (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - + kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + + kY * kY * kY * val_r; + const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - + (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - + (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - + (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - + (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - + kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + + kZ * kZ * kZ * val_r; + + grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; + grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; + + grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; + grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; + + grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; + + grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; + grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; + + grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; + grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; + + grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; + grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; + grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; + grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; + grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; + } #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + nComplexBands + j; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], + h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], + h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], + gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], + gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], + gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], + gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - + kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + + kX * kX * kX * val_r; + const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - + (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - + (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - + (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - + (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - + (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - + (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - + kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - + (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + + kX * kY * kZ * val_r; + const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - + (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - + (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - + kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + + kY * kY * kY * val_r; + const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - + (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - + (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - + (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - + (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - + kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + + kZ * kZ * kZ * val_r; + //[x][xx] //These are the unique entries + grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; + grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; + + grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; + grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; + + grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; + } } -template -void SplineC2RT::evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void +SplineC2RT::evaluateVGHGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, + last / 2); + } } -template class SplineC2RT; -template class SplineC2RT; +template class SplineC2RT; +template class SplineC2RT; +template class SplineC2RT; +template class SplineC2RT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h index cd6b45c240..b7cf9e109d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h @@ -1,214 +1,255 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Anouar Benali, benali@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@intel.com, University of +// Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov, +// Argonne National Laboratory Anouar Benali, benali@anl.gov, +// Argonne National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - /** @file * - * class to handle complex splines to real orbitals with splines of arbitrary precision + * class to handle complex splines to real orbitals with splines of arbitrary + * precision */ #ifndef QMCPLUSPLUS_SPLINE_C2RT_H #define QMCPLUSPLUS_SPLINE_C2RT_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" #include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" #include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs +/** class to match std::complex spline with BsplineSet::ValueType (real) + * SPOs * @tparam ST precision of spline * * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. The first nComplexBands complex + * splines produce 2 real orbitals. The rest complex splines produce 1 real + * orbital. All the output orbitals are real (C2R). The maximal number of output + * orbitals is OrbitalSetSize. */ -template -class SplineC2RT : public BsplineSetT +template +class SplineC2RT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using ValueVector = typename BsplineSetT::ValueVector; - using GGGVector = typename BsplineSetT::GGGVector; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - - using ghContainer_type = VectorSoaContainer; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr> SplineInst; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// number of complex bands + int nComplexBands; + /// multi bspline set + std::shared_ptr> SplineInst; + + vContainer_type mKK; + VectorSoaContainer myKcart; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2RT(const std::string& my_name) : BsplineSetT(my_name), nComplexBands(0) {} - - SplineC2RT(const SplineC2RT& in); - virtual std::string getClassName() const override { return "SplineC2R"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - - std::unique_ptr> makeClone() const override { return std::make_unique>(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] = this->offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = this->kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) + SplineC2RT(const std::string& my_name) : + BsplineSetT(my_name), + nComplexBands(0) + { + } + + SplineC2RT(const SplineC2RT& in); + virtual std::string + getClassName() const override + { + return "SplineC2R"; + } + virtual std::string + getKeyword() const override + { + return "SplineC2R"; + } + bool + isComplex() const override + { + return true; + }; + + std::unique_ptr> + makeClone() const override { - mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); - myKcart(i) = this->kPoints[i]; + return std::make_unique(*this); } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSetT& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + + inline void + resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void + bcast_tables(Communicate* comm) + { + chunked_bcast(comm, SplineInst->getSplinePtr()); + } + + void + gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] = this->offset[ib] * 2; + gatherv(comm, SplineInst->getSplinePtr(), + SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void + create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) + << " MB allocated " + << "for the coefficients in 3D spline orbital representation" + << std::endl; + } + + inline void + flush_zero() + { + SplineInst->flush_zero(); + } + + /** remap kPoints to pack the double copy */ + inline void + resize_kpoints() + { + nComplexBands = this->remap_kpoints(); + const int nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) { + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; + } + } + + void + set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, + int twist, int ispline, int level); + + bool + read_splines(hdf_archive& h5f); + + bool + write_splines(hdf_archive& h5f); + + void + assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, + int first, int last) const; + + void + evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) override; + + /** assign_vgl + */ + void + assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void + assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi); + + void + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + void + assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, int first, int last) const; + + void + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) override; + + void + assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, + int last = -1) const; + + void + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp index 176cb5dee8..ce4bb5e8aa 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp @@ -23,20 +23,20 @@ namespace qmcplusplus { -template -SplineR2RT::SplineR2RT(const SplineR2RT& in) = default; +template +SplineR2RT::SplineR2RT(const SplineR2RT& in) = default; -template +template inline void -SplineR2RT::set_spline(SingleSplineType* spline_r, +SplineR2RT::set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level) { SplineInst->copy_spline(spline_r, ispline); } -template +template bool -SplineR2RT::read_splines(hdf_archive& h5f) +SplineR2RT::read_splines(hdf_archive& h5f) { std::ostringstream o; o << "spline_" << this->MyIndex; @@ -44,9 +44,9 @@ SplineR2RT::read_splines(hdf_archive& h5f) return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template +template bool -SplineR2RT::write_splines(hdf_archive& h5f) +SplineR2RT::write_splines(hdf_archive& h5f) { std::ostringstream o; o << "spline_" << this->MyIndex; @@ -54,13 +54,13 @@ SplineR2RT::write_splines(hdf_archive& h5f) return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template +template void -SplineR2RT::storeParamsBeforeRotation() +SplineR2RT::storeParamsBeforeRotation() { const auto spline_ptr = SplineInst->getSplinePtr(); const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); + coef_copy_ = std::make_shared>(coefs_tot_size); std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); } @@ -104,9 +104,10 @@ SplineR2RT::storeParamsBeforeRotation() NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix" is very tall and skinny. */ -template +template void -SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +SplineR2RT::applyRotation( + const ValueMatrix& rot_mat, bool use_stored_copy) { // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp const auto spline_ptr = SplineInst->getSplinePtr(); @@ -138,9 +139,9 @@ SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) } } -template +template inline void -SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, +SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const { // protect last @@ -152,10 +153,10 @@ SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, psi[this->first_spo + j] = signed_one * myV[j]; } -template +template void -SplineR2RT::evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) +SplineR2RT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru; @@ -172,9 +173,9 @@ SplineR2RT::evaluateValue( } } -template +template void -SplineR2RT::evaluateDetRatios(const VirtualParticleSetT& VP, +SplineR2RT::evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) { const bool need_resize = ratios_private.rows() < VP.getTotalNum(); @@ -216,9 +217,9 @@ SplineR2RT::evaluateDetRatios(const VirtualParticleSetT& VP, } } -template +template inline void -SplineR2RT::assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, +SplineR2RT::assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const { // protect last @@ -261,9 +262,9 @@ SplineR2RT::assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ -template +template inline void -SplineR2RT::assign_vgl_from_l( +SplineR2RT::assign_vgl_from_l( int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { const ST signed_one = (bc_sign & 1) ? -1 : 1; @@ -283,9 +284,9 @@ SplineR2RT::assign_vgl_from_l( } } -template +template void -SplineR2RT::evaluateVGL(const ParticleSetT& P, const int iat, +SplineR2RT::evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { const PointType& r = P.activeR(iat); @@ -304,9 +305,9 @@ SplineR2RT::evaluateVGL(const ParticleSetT& P, const int iat, } } -template +template void -SplineR2RT::assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, +SplineR2RT::assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const { // protect last @@ -373,9 +374,9 @@ SplineR2RT::assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, } } -template +template void -SplineR2RT::evaluateVGH(const ParticleSetT& P, const int iat, +SplineR2RT::evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); @@ -394,11 +395,11 @@ SplineR2RT::evaluateVGH(const ParticleSetT& P, const int iat, } } -template +template void -SplineR2RT::assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first, - int last) const +SplineR2RT::assign_vghgh(int bc_sign, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, + int first, int last) const { // protect last last = last < 0 ? @@ -574,9 +575,9 @@ SplineR2RT::assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi, } } -template +template void -SplineR2RT::evaluateVGHGH(const ParticleSetT& P, const int iat, +SplineR2RT::evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { @@ -597,7 +598,9 @@ SplineR2RT::evaluateVGHGH(const ParticleSetT& P, const int iat, } } -template class SplineR2RT; -template class SplineR2RT; +template class SplineR2RT; +template class SplineR2RT; // do we need this one? +template class SplineR2RT; +template class SplineR2RT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index f265561e18..ece156ac1a 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -33,8 +33,8 @@ namespace qmcplusplus * Requires temporage storage and multiplication of the sign of the real part of * the phase Internal storage ST type arrays are aligned and padded. */ -template -class SplineR2RT : public BsplineSetT +template +class SplineR2RT : public BsplineSetT { public: using SplineType = typename bspline_traits::SplineType; @@ -43,19 +43,19 @@ class SplineR2RT : public BsplineSetT using PointType = TinyVector; using SingleSplineType = UBspline_3d_d; // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using GGGVector = typename BsplineSetT::GGGVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; + using TT = typename BsplineSetT::ValueType; + using GGGVector = typename BsplineSetT::GGGVector; + using ValueMatrix = typename BsplineSetT::ValueMatrix; + using GradVector = typename BsplineSetT::GradVector; + using HessVector = typename BsplineSetT::HessVector; + using ValueVector = typename BsplineSetT::ValueVector; using vContainer_type = Vector>; using gContainer_type = VectorSoaContainer; using hContainer_type = VectorSoaContainer; using ghContainer_type = VectorSoaContainer; - using RealType = typename SPOSetT::RealType; + using RealType = typename SPOSetT::RealType; private: bool IsGamma; @@ -66,7 +66,7 @@ class SplineR2RT : public BsplineSetT std::shared_ptr> SplineInst; /// Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; + std::shared_ptr> coef_copy_; /// thread private ratios for reduction when using nested threading, numVP x /// numThread @@ -83,7 +83,7 @@ class SplineR2RT : public BsplineSetT ghContainer_type mygH; public: - SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) + SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) { } @@ -109,10 +109,10 @@ class SplineR2RT : public BsplineSetT return true; } - std::unique_ptr> + std::unique_ptr> makeClone() const override { - return std::make_unique>(*this); + return std::make_unique>(*this); } /// Store an original copy of the spline coefficients for orbital rotation @@ -222,10 +222,10 @@ class SplineR2RT : public BsplineSetT void evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + const ParticleSetT& P, const int iat, ValueVector& psi) override; void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) override; void @@ -240,7 +240,7 @@ class SplineR2RT : public BsplineSetT int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; void @@ -248,7 +248,7 @@ class SplineR2RT : public BsplineSetT HessVector& grad_grad_psi, int first, int last) const; void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) override; void @@ -257,13 +257,14 @@ class SplineR2RT : public BsplineSetT int last = -1) const; void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) override; template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h index dbeb68ff3c..97ba261ddd 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h @@ -28,6 +28,10 @@ #include "mpi/collectives.h" #include "mpi/point2point.h" #include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "Utilities/ProgressReportEngine.h" +#include "QMCWaveFunctions/einspline_helper.hpp" +#include namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h new file mode 100644 index 0000000000..816561008c --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h @@ -0,0 +1,322 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov, +// Argonne National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Jeongnim +// Kim, jeongnim.kim@inte.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPLINESET_READERT_H +#define QMCPLUSPLUS_SPLINESET_READERT_H +#include "BsplineFactory/BsplineReaderBaseT.h" +#include "Utilities/FairDivide.h" +#include "mpi/collectives.h" +#include "mpi/point2point.h" + +namespace qmcplusplus +{ +/** General SplineSetReader to handle any unitcell + */ +template +class SplineSetReaderT : public BsplineReaderBaseT +{ +public: + using splineset_t = SA; + using DataType = typename splineset_t::DataType; + using SplineType = typename splineset_t::SplineType; + using ValueType = typename splineset_t::ValueType; + + Array, 3> FFTbox; + Array splineData_r, splineData_i; + double rotate_phase_r, rotate_phase_i; + UBspline_3d_d* spline_r; + UBspline_3d_d* spline_i; + splineset_t* bspline; + fftw_plan FFTplan; + + SplineSetReaderT(EinsplineSetBuilderT* e) : + BsplineReaderBaseT(e), + spline_r(nullptr), + spline_i(nullptr), + bspline(nullptr), + FFTplan(nullptr) + { + } + + ~SplineSetReaderT() override + { + clear(); + } + + void + clear() + { + einspline::destroy(spline_r); + einspline::destroy(spline_i); + if (FFTplan != nullptr) + fftw_destroy_plan(FFTplan); + FFTplan = nullptr; + } + + // set info for Hybrid + virtual void + initialize_hybridrep_atomic_centers() + { + } + // transform cG to radial functions + virtual void + create_atomic_centers_Gspace(Vector>& cG, + Communicate& band_group_comm, int iorb) + { + } + + std::unique_ptr> + create_spline_set(const std::string& my_name, int spin, + const BandInfoGroup& bandgroup) override + { + ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); + // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, + // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, + // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; + bspline = new splineset_t(my_name); + app_log() << " ClassName = " << bspline->getClassName() << std::endl; + if (bspline->isComplex()) + app_log() << " Using complex einspline table" << std::endl; + else + app_log() << " Using real einspline table" << std::endl; + + // set info for Hybrid + this->initialize_hybridrep_atomic_centers(); + + // baseclass handles twists + this->check_twists(bspline, bandgroup); + + Ugrid xyz_grid[3]; + + typename splineset_t::BCType xyz_bc[3]; + bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc); + if (!havePsig) + this->myComm->barrier_and_abort( + "SplineSetReader needs psi_g. Set precision=\"double\"."); + bspline->create_spline(xyz_grid, xyz_bc); + + std::ostringstream oo; + oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x" + << this->MeshSize[1] << "x" << this->MeshSize[2] << ".h5"; + + const std::string splinefile(oo.str()); + bool root = (this->myComm->rank() == 0); + int foundspline = 0; + Timer now; + if (root) { + now.restart(); + hdf_archive h5f(this->myComm); + foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); + if (foundspline) { + std::string aname("none"); + foundspline = h5f.readEntry(aname, "class_name"); + foundspline = + (aname.find(bspline->getKeyword()) != std::string::npos); + } + if (foundspline) { + int sizeD = 0; + foundspline = h5f.readEntry(sizeD, "sizeof"); + foundspline = (sizeD == sizeof(DataType)); + } + if (foundspline) { + foundspline = bspline->read_splines(h5f); + if (foundspline) + app_log() << " Successfully restored coefficients from " + << splinefile << ". The reading time is " + << now.elapsed() << " sec." << std::endl; + } + h5f.close(); + } + this->myComm->bcast(foundspline); + if (foundspline) { + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " SplineSetReader bcast the full table " + << now.elapsed() << " sec." << std::endl; + app_log().flush(); + } + else { + bspline->flush_zero(); + + int nx = this->MeshSize[0]; + int ny = this->MeshSize[1]; + int nz = this->MeshSize[2]; + if (havePsig) // perform FFT using FFTW + { + FFTbox.resize(nx, ny, nz); + FFTplan = fftw_plan_dft_3d(nx, ny, nz, + reinterpret_cast(FFTbox.data()), + reinterpret_cast(FFTbox.data()), +1, + FFTW_ESTIMATE); + splineData_r.resize(nx, ny, nz); + if (bspline->isComplex()) + splineData_i.resize(nx, ny, nz); + + TinyVector start(0.0); + TinyVector end(1.0); + spline_r = einspline::create( + spline_r, start, end, this->MeshSize, bspline->HalfG); + if (bspline->isComplex()) + spline_i = einspline::create( + spline_i, start, end, this->MeshSize, bspline->HalfG); + + now.restart(); + initialize_spline_pio_gather(spin, bandgroup); + app_log() << " SplineSetReader initialize_spline_pio " + << now.elapsed() << " sec" << std::endl; + + fftw_destroy_plan(FFTplan); + FFTplan = NULL; + } + else // why, don't know + initialize_spline_psi_r(spin, bandgroup); + if (this->saveSplineCoefs && root) { + now.restart(); + hdf_archive h5f; + h5f.create(splinefile); + std::string classname = bspline->getClassName(); + h5f.write(classname, "class_name"); + int sizeD = sizeof(DataType); + h5f.write(sizeD, "sizeof"); + bspline->write_splines(h5f); + h5f.close(); + app_log() << " Stored spline coefficients in " << splinefile + << " for potential reuse. The writing time is " + << now.elapsed() << " sec." << std::endl; + } + } + + clear(); + return std::unique_ptr>{bspline}; + } + + /** fft and spline cG + * @param cG psi_g to be processed + * @param ti twist index + * @param iorb orbital index + * + * Perform FFT and spline to spline_r and spline_i + */ + inline void + fft_spline(Vector>& cG, int ti) + { + unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox); + fftw_execute(FFTplan); + if (bspline->isComplex()) { + if (this->rotate) + fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, + this->mybuilder->primcell_kpoints[ti], rotate_phase_r, + rotate_phase_i); + else { + split_real_components_c2c(FFTbox, splineData_r, splineData_i); + rotate_phase_r = 1.0; + rotate_phase_i = 0.0; + } + einspline::set(spline_r, splineData_r.data()); + einspline::set(spline_i, splineData_i.data()); + } + else { + fix_phase_rotate_c2r(FFTbox, splineData_r, + this->mybuilder->primcell_kpoints[ti], rotate_phase_r, + rotate_phase_i); + einspline::set(spline_r, splineData_r.data()); + } + } + + /** initialize the splines + */ + void + initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) + { + // distribute bands over processor groups + int Nbands = bandgroup.getNumDistinctOrbitals(); + const int Nprocs = this->myComm->size(); + const int Nbandgroups = std::min(Nbands, Nprocs); + Communicate band_group_comm(*this->myComm, Nbandgroups); + std::vector band_groups(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, band_groups); + int iorb_first = band_groups[band_group_comm.getGroupID()]; + int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + + app_log() << "Start transforming plane waves to 3D B-Splines." + << std::endl; + hdf_archive h5f(&band_group_comm, false); + Vector> cG(this->mybuilder->Gvecs[0].size()); + const std::vector& cur_bands = bandgroup.myBands; + if (band_group_comm.isGroupLeader()) + h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY); + for (int iorb = iorb_first; iorb < iorb_last; iorb++) { + if (band_group_comm.isGroupLeader()) { + int iorb_h5 = bspline->BandIndexMap[iorb]; + int ti = cur_bands[iorb_h5].TwistIndex; + std::string s = + this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); + if (!h5f.readEntry(cG, s)) { + std::ostringstream msg; + msg << "SplineSetReader Failed to read band(s) from h5 " + "file. " + << "Attempted dataset " << s << " with " << cG.size() + << " complex numbers." << std::endl; + throw std::runtime_error(msg.str()); + } + double total_norm = compute_norm(cG); + if ((this->checkNorm) && + (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) { + std::ostringstream msg; + msg << "SplineSetReader The orbital " << iorb_h5 + << " has a wrong norm " << total_norm + << ", computed from plane wave coefficients!" + << std::endl + << "This may indicate a problem with the HDF5 library " + "versions used " + << "during wavefunction conversion or read." + << std::endl; + throw std::runtime_error(msg.str()); + } + fft_spline(cG, ti); + bspline->set_spline( + spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); + } + this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); + } + + this->myComm->barrier(); + Timer now; + if (band_group_comm.isGroupLeader()) { + now.restart(); + bspline->gather_tables(band_group_comm.getGroupLeaderComm()); + app_log() << " Time to gather the table = " << now.elapsed() + << std::endl; + } + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " Time to bcast the table = " << now.elapsed() + << std::endl; + } + + void + initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) + { + // old implementation buried in the history + this->myComm->barrier_and_abort( + "SplineSetReaderP initialize_spline_psi_r " + "implementation not finished."); + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp new file mode 100644 index 0000000000..f7b5e17c77 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp @@ -0,0 +1,331 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" + +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepRealT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2CT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineR2RT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "QMCWaveFunctions/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include +#include + +namespace qmcplusplus +{ +template +struct CreateComplexHelper +{ + static inline std::unique_ptr> + createDouble( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) + { + using RealType = typename EinsplineSetBuilderT::RealType; + std::unique_ptr> aReader; + + app_summary() + << " Using real valued spline SPOs with complex double " + "precision storage (C2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == + PlatformKind::OMPTARGET) { + app_summary() << " Running OpenMP offload code path." + << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + else { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique< + HybridRepSetReaderT>>>( + e); + } + else + aReader = + std::make_unique>>( + e); + } + + return aReader; + } + + static inline std::unique_ptr> + createSingle( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) + { + using RealType = typename EinsplineSetBuilderT::RealType; + std::unique_ptr> aReader; + + app_summary() + << " Using real valued spline SPOs with complex single " + "precision storage (C2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == + PlatformKind::OMPTARGET) { + app_summary() << " Running OpenMP offload code path." + << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + else { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique< + HybridRepSetReaderT>>>( + e); + } + else + aReader = + std::make_unique>>(e); + } + + return aReader; + } +}; + +template +struct CreateComplexHelper> +{ + using ValueType = std::complex; + using RealType = typename EinsplineSetBuilderT::RealType; + + static inline std::unique_ptr> + createDouble(EinsplineSetBuilderT* e, bool hybrid_rep, + const std::string& useGPU) + { + std::unique_ptr> aReader; + + app_summary() + << " Using complex valued spline SPOs with complex double " + "precision storage (C2C)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == + PlatformKind::OMPTARGET) { + app_summary() << " Running OpenMP offload code path." + << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + else { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + + return aReader; + } + + static inline std::unique_ptr> + createSingle(EinsplineSetBuilderT* e, bool hybrid_rep, + const std::string& useGPU) + { + std::unique_ptr> aReader; + + app_summary() + << " Using complex valued spline SPOs with complex single " + "precision storage (C2C)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == + PlatformKind::OMPTARGET) { + app_summary() << " Running OpenMP offload code path." + << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + else { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) { + app_summary() + << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique< + SplineSetReaderT>>(e); + } + + return aReader; + } +}; + +template +std::unique_ptr> +createBsplineComplexDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) +{ + return CreateComplexHelper::createDouble(e, hybrid_rep, useGPU); +} + +template std::unique_ptr>> +createBsplineComplexDoubleT>( + EinsplineSetBuilderT>* e, bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr>> +createBsplineComplexDoubleT>( + EinsplineSetBuilderT>* e, bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> +createBsplineComplexDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +template std::unique_ptr> +createBsplineComplexDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, const std::string& useGPU); + +template +std::unique_ptr> +createBsplineComplexSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) +{ + return CreateComplexHelper::createSingle(e, hybrid_rep, useGPU); +} + +template std::unique_ptr>> +createBsplineComplexSingleT>( + EinsplineSetBuilderT>* e, bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr>> +createBsplineComplexSingleT>( + EinsplineSetBuilderT>* e, bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> +createBsplineComplexSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +template std::unique_ptr> +createBsplineComplexSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, const std::string& useGPU); + +template +std::unique_ptr> +createBsplineRealDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) +{ + app_summary() << " Using real valued spline SPOs with real double " + "precision storage (R2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + app_summary() << "OpenMP offload has not been implemented to support " + "real valued spline SPOs with real storage!" + << std::endl; + app_summary() << " Running on CPU." << std::endl; + + std::unique_ptr> aReader; + if (hybrid_rep) { + app_summary() << " Using hybrid orbital representation." + << std::endl; + aReader = std::make_unique< + HybridRepSetReaderT>>>(e); + } + else + aReader = std::make_unique>>(e); + return aReader; +} + +template std::unique_ptr> +createBsplineRealDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +template std::unique_ptr> +createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, const std::string& useGPU); + +template +std::unique_ptr> +createBsplineRealSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU) +{ + app_summary() << " Using real valued spline SPOs with real single " + "precision storage (R2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + app_summary() << "OpenMP offload has not been implemented to support " + "real valued spline SPOs with real storage!" + << std::endl; + app_summary() << " Running on CPU." << std::endl; + + std::unique_ptr> aReader; + if (hybrid_rep) { + app_summary() << " Using hybrid orbital representation." + << std::endl; + aReader = std::make_unique< + HybridRepSetReaderT>>>(e); + } + else + aReader = std::make_unique>>(e); + return aReader; +} + +template std::unique_ptr> +createBsplineRealSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +template std::unique_ptr> +createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, const std::string& useGPU); + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h new file mode 100644 index 0000000000..898d8f2a2e --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h @@ -0,0 +1,59 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_CREATE_BSPLINE_READERT_H +#define QMCPLUSPLUS_CREATE_BSPLINE_READERT_H + +#include +#include + +namespace qmcplusplus +{ +/// forward declaration +template +class BsplineReaderBaseT; +template +class EinsplineSetBuilderT; + +/** create a reader which handles complex (double size real) splines, C2R or C2C + * case spline storage and computation precision is double + */ +template +std::unique_ptr> +createBsplineComplexDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +/** create a reader which handles complex (double size real) splines, C2R or C2C + * case spline storage and computation precision is float + */ +template +std::unique_ptr> +createBsplineComplexSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +/** create a reader which handles real splines, R2R case + * spline storage and computation precision is double + */ +template +std::unique_ptr> +createBsplineRealDoubleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +/** create a reader which handles real splines, R2R case + * spline storage and computation precision is float + */ +template +std::unique_ptr> +createBsplineRealSingleT( + EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 909893abe6..05c1fe018b 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -108,28 +108,43 @@ if(OHMMS_DIM MATCHES 3) if(HAVE_EINSPLINE) set(FERMION_SRCS ${FERMION_SRCS} + EinsplineSetBuilderT.cpp EinsplineSetBuilderCommon.cpp EinsplineSetBuilderOld.cpp EinsplineSetBuilderReadBands_ESHDF.cpp EinsplineSetBuilderESHDF.fft.cpp EinsplineSetBuilder_createSPOs.cpp + BsplineFactory/createBsplineReaderT.cpp BsplineFactory/createComplexDouble.cpp BsplineFactory/createComplexSingle.cpp BsplineFactory/HybridRepCenterOrbitals.cpp + BsplineFactory/HybridRepCenterOrbitalsT.cpp BandInfo.cpp - BsplineFactory/BsplineReaderBase.cpp) + BsplineFactory/SplineC2RT.cpp + BsplineFactory/SplineR2RT.cpp + BsplineFactory/SplineC2CT.cpp + BsplineFactory/BsplineReaderBase.cpp + BsplineFactory/BsplineReaderBaseT.cpp) set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp - BsplineFactory/SplineC2RTOMPTarget.cpp + BsplineFactory/SplineC2ROMPTargetT.cpp + BsplineFactory/SplineC2COMPTargetT.cpp ) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp BsplineFactory/SplineC2CT.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp BsplineFactory/SplineC2COMPTargetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + EinsplineSpinorSetBuilder.cpp + BsplineFactory/SplineC2C.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} + BsplineFactory/SplineC2COMPTarget.cpp) else(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp - BsplineFactory/SplineC2R.cpp BsplineFactory/SplineC2RT.cpp BsplineFactory/SplineR2R.cpp BsplineFactory/SplineR2RT.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2ROMPTarget.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + BsplineFactory/createRealSingle.cpp + BsplineFactory/createRealDouble.cpp + BsplineFactory/SplineC2R.cpp + BsplineFactory/SplineR2R.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} + BsplineFactory/SplineC2ROMPTarget.cpp) endif(QMC_COMPLEX) endif(HAVE_EINSPLINE) diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp new file mode 100644 index 0000000000..f48ea6348a --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp @@ -0,0 +1,1815 @@ + +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" + +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "CPU/math.hpp" +#include "Message/CommOperators.h" +#include "Message/Communicate.h" +#include "OhmmsData/AttributeSet.h" +#include "Particle/DistanceTableT.h" +#include "ParticleBase/RandomSeqGenerator.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" +#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "QMCWaveFunctions/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include "Utilities/Timer.h" +#include "Utilities/qmc_common.h" +#include +#include +#include + +#include +#include +#include + +namespace qmcplusplus +{ +// std::map EinsplineSetBuilder::SPOSetMap; +// std::map,EinsplineSetBuilder::OrbType*,Int4less> +// EinsplineSetBuilder::OrbitalMap; +////std::map +/// EinsplineSetBuilder::ExtendedMap_z; +////std::map +/// EinsplineSetBuilder::ExtendedMap_d; + +template +EinsplineSetBuilderT::EinsplineSetBuilderT(ParticleSetT& p, + const PSetMap& psets, Communicate* comm, xmlNodePtr cur) : + SPOSetBuilderT("spline", comm), + ParticleSets(psets), + TargetPtcl(p), + XMLRoot(cur), + Format(QMCPACK), + NumBands(0), + NumElectrons(0), + NumSpins(0), + NumTwists(0), + MeshFactor(1.0), + MeshSize(0, 0, 0), + twist_num_(-1), + LastSpinSet(-1), + NumOrbitalsRead(-1), + makeRotations(false) +{ + this->ClassName = "EinsplineSetBuilder"; + + MatchingTol = 10 * std::numeric_limits::epsilon(); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + TileMatrix(i, j) = 0; + + // invalidate states by the basis class + this->states.clear(); + this->states.resize(p.groups()); + + // create vectors with nullptr + FullBands.resize(p.groups()); +} + +template +inline TinyVector +IntPart(const TinyVector& twist) +{ + return TinyVector(round(twist[0] - 1.0e-6), round(twist[1] - 1.0e-6), + round(twist[2] - 1.0e-6)); +} + +template +inline TinyVector +FracPart(const TinyVector& twist) +{ + return twist - IntPart(twist); +} + +template +EinsplineSetBuilderT::~EinsplineSetBuilderT() +{ + DEBUG_MEMORY("EinsplineSetBuilder::~EinsplineSetBuilder"); +} + +template +bool +EinsplineSetBuilderT::CheckLattice() +{ + double diff = 0.0; + for (int i = 0; i < OHMMS_DIM; i++) + for (int j = 0; j < OHMMS_DIM; j++) { + double max_abs = std::max(std::abs(SuperLattice(i, j)), + static_cast(std::abs(TargetPtcl.getLattice().R(i, j)))); + if (max_abs > MatchingTol) + diff = std::max(diff, + std::abs( + SuperLattice(i, j) - TargetPtcl.getLattice().R(i, j)) / + max_abs); + } + + if (diff > MatchingTol) { + std::ostringstream o; + o.setf(std::ios::scientific, std::ios::floatfield); + o.precision(6); + o << "EinsplineSetBuilder::ReadOrbitalInfo_ESHDF \n" + << "Mismatched supercell lattices.\n"; + o << " Lattice in ESHDF5 " << std::endl; + o << SuperLattice << std::endl; + o << " Lattice in xml" << std::endl; + o << TargetPtcl.getLattice().R << std::endl; + o << " Difference " << std::endl; + o << SuperLattice - TargetPtcl.getLattice().R << std::endl; + o << " Max relative error = " << diff << std::endl; + o << " Tolerance = " << MatchingTol << std::endl; + app_error() << o.str(); + return false; + } + return true; +} + +template +void +EinsplineSetBuilderT::BroadcastOrbitalInfo() +{ + if (this->myComm->size() == 1) + return; + int numIons = IonTypes.size(); + int numDensityGvecs = TargetPtcl.DensityReducedGvecs.size(); + PooledData abuffer; + PooledData aibuffer; + aibuffer.add(Version.begin(), Version.end()); // myComm->bcast(Version); + aibuffer.add(Format); + abuffer.add(Lattice.begin(), Lattice.end()); // myComm->bcast(Lattice); + abuffer.add(RecipLattice.begin(), + RecipLattice.end()); // myComm->bcast(RecipLattice); + abuffer.add(SuperLattice.begin(), + SuperLattice.end()); // myComm->bcast(SuperLattice); + abuffer.add( + LatticeInv.begin(), LatticeInv.end()); // myComm->bcast(LatticeInv); + aibuffer.add(NumBands); // myComm->bcast(NumBands); + aibuffer.add(NumElectrons); // myComm->bcast(NumElectrons); + aibuffer.add(NumSpins); // myComm->bcast(NumSpins); + aibuffer.add(NumTwists); // myComm->bcast(NumTwists); + aibuffer.add(numIons); // myComm->bcast(numIons); + aibuffer.add(numDensityGvecs); + aibuffer.add(HaveOrbDerivs); + this->myComm->bcast(abuffer); + this->myComm->bcast(aibuffer); + if (this->myComm->rank()) { + abuffer.rewind(); + aibuffer.rewind(); + aibuffer.get(Version.begin(), Version.end()); + aibuffer.get(Format); + abuffer.get(Lattice.begin(), Lattice.end()); + abuffer.get(RecipLattice.begin(), RecipLattice.end()); + abuffer.get(SuperLattice.begin(), SuperLattice.end()); + abuffer.get(LatticeInv.begin(), LatticeInv.end()); + aibuffer.get(NumBands); + aibuffer.get(NumElectrons); + aibuffer.get(NumSpins); + aibuffer.get(NumTwists); + aibuffer.get(numIons); + aibuffer.get(numDensityGvecs); + aibuffer.get(HaveOrbDerivs); + TargetPtcl.DensityReducedGvecs.resize(numDensityGvecs); + TargetPtcl.Density_G.resize(numDensityGvecs); + } + if (IonTypes.size() != numIons) { + IonTypes.resize(numIons); + IonPos.resize(numIons); + } + // new buffer + PooledData bbuffer; + PooledData bibuffer; + for (int i = 0; i < numIons; ++i) + bibuffer.add(IonTypes[i]); + // myComm->bcast(IonTypes); + bbuffer.add(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons); + // myComm->bcast(IonPos); + if (primcell_kpoints.size() != NumTwists) + primcell_kpoints.resize(NumTwists); + bbuffer.add(&primcell_kpoints[0][0], + &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists); + bibuffer.add(&(TargetPtcl.DensityReducedGvecs[0][0]), + &(TargetPtcl.DensityReducedGvecs[0][0]) + numDensityGvecs * OHMMS_DIM); + bbuffer.add(&(TargetPtcl.Density_G[0]), + &(TargetPtcl.Density_G[0]) + numDensityGvecs); + this->myComm->bcast(bbuffer); + this->myComm->bcast(bibuffer); + if (this->myComm->rank()) { + bbuffer.rewind(); + bibuffer.rewind(); + for (int i = 0; i < numIons; ++i) + bibuffer.get(IonTypes[i]); + bbuffer.get(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons); + bbuffer.get(&primcell_kpoints[0][0], + &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists); + bibuffer.get(&(TargetPtcl.DensityReducedGvecs[0][0]), + &(TargetPtcl.DensityReducedGvecs[0][0]) + + numDensityGvecs * OHMMS_DIM); + bbuffer.get(&(TargetPtcl.Density_G[0]), + &(TargetPtcl.Density_G[0]) + numDensityGvecs); + } + // buffer to bcast hybrid representation atomic orbital info + PooledData cbuffer; + PooledData cibuffer; + this->myComm->bcast(cbuffer); + this->myComm->bcast(cibuffer); + AtomicCentersInfo.resize(numIons); + Super2Prim.resize(SourcePtcl->R.size()); + cbuffer.add(AtomicCentersInfo.inner_cutoff.begin(), + AtomicCentersInfo.inner_cutoff.end()); + cbuffer.add(AtomicCentersInfo.non_overlapping_radius.begin(), + AtomicCentersInfo.non_overlapping_radius.end()); + cbuffer.add( + AtomicCentersInfo.cutoff.begin(), AtomicCentersInfo.cutoff.end()); + cbuffer.add(AtomicCentersInfo.spline_radius.begin(), + AtomicCentersInfo.spline_radius.end()); + cibuffer.add(Super2Prim.begin(), Super2Prim.end()); + cibuffer.add(AtomicCentersInfo.lmax.begin(), AtomicCentersInfo.lmax.end()); + cibuffer.add( + AtomicCentersInfo.GroupID.begin(), AtomicCentersInfo.GroupID.end()); + cibuffer.add(AtomicCentersInfo.spline_npoints.begin(), + AtomicCentersInfo.spline_npoints.end()); + this->myComm->bcast(cbuffer); + this->myComm->bcast(cibuffer); + if (this->myComm->rank()) { + cbuffer.rewind(); + cibuffer.rewind(); + cbuffer.get(AtomicCentersInfo.inner_cutoff.begin(), + AtomicCentersInfo.inner_cutoff.end()); + cbuffer.get(AtomicCentersInfo.non_overlapping_radius.begin(), + AtomicCentersInfo.non_overlapping_radius.end()); + cbuffer.get( + AtomicCentersInfo.cutoff.begin(), AtomicCentersInfo.cutoff.end()); + cbuffer.get(AtomicCentersInfo.spline_radius.begin(), + AtomicCentersInfo.spline_radius.end()); + cibuffer.get(Super2Prim.begin(), Super2Prim.end()); + cibuffer.get( + AtomicCentersInfo.lmax.begin(), AtomicCentersInfo.lmax.end()); + cibuffer.get( + AtomicCentersInfo.GroupID.begin(), AtomicCentersInfo.GroupID.end()); + cibuffer.get(AtomicCentersInfo.spline_npoints.begin(), + AtomicCentersInfo.spline_npoints.end()); + for (int i = 0; i < numIons; i++) + AtomicCentersInfo.ion_pos[i] = IonPos[i]; + } +} + +//////////////////////////////////////////////////////////////// +//// Create the ion ParticleSet from the data in the HDF file // +//////////////////////////////////////////////////////////////// +// void +// EinsplineSetBuilder::CreateIonParticleSet( std::string sourceName) +//{ +// // ParticleSet &pTemp = *(new MCWalkerConfiguration); +// ParticleSet &pTemp = *(new ParticleSet); +// pTemp.setName (sourceName); +// SpeciesSet& tspecies(pTemp.getSpeciesSet()); +// ParticleSets[sourceName] = &pTemp; +// } +// + +template +void +EinsplineSetBuilderT::TileIons() +{ + // set the primitive lattice + SourcePtcl->getPrimitiveLattice().set(Lattice); + + for (int j = 0; j < IonPos.size(); ++j) + IonPos[j] = + FracPart(SourcePtcl->getPrimitiveLattice().toUnit(IonPos[j])); + + IonPos.resize(SourcePtcl->getTotalNum()); + IonTypes.resize(SourcePtcl->getTotalNum()); + std::copy(SourcePtcl->R.begin(), SourcePtcl->R.end(), IonPos.begin()); + std::copy(SourcePtcl->GroupID.begin(), SourcePtcl->GroupID.end(), + IonTypes.begin()); + + // app_log() << " Primitive Cell\n"; + // SourcePtcl->getPrimitiveLattice().print(app_log()); + // app_log() << " Super Cell\n"; + // SourcePtcl->Lattice.print(app_log()); + + // Don't need to do this, already one by ParticleSetPool.cpp + // Vector > primPos = IonPos; + // Vector primTypes = IonTypes; + // int numCopies = std::abs(det(TileMatrix)); + // IonTypes.resize(primPos.size()*numCopies); + // IonPos.resize (primPos.size()*numCopies); + // int maxCopies = 10; + // using Vec3 = TinyVector; + // int index=0; + // for (int i0=-maxCopies; i0<=maxCopies; i0++) + // for (int i1=-maxCopies; i1<=maxCopies; i1++) + // for (int i2=-maxCopies; i2<=maxCopies; i2++) + // for (int iat=0; iat < primPos.size(); iat++) + // { + // Vec3 r = primPos[iat]; + // Vec3 uPrim = PrimCell.toUnit(r); + // for (int i=0; i<3; i++) + // uPrim[i] -= std::floor(uPrim[i]); + // r = PrimCell.toCart(uPrim) + (double)i0*PrimCell.a(0) + + // (double)i1*PrimCell.a(1) + (double)i2*PrimCell.a(2); + // Vec3 uSuper = SuperCell.toUnit(r); + // if ((uSuper[0] >= -1.0e-4) && (uSuper[0] < 0.9999) && + // (uSuper[1] >= -1.0e-4) && (uSuper[1] < 0.9999) && + // (uSuper[2] >= -1.0e-4) && (uSuper[2] < 0.9999)) + // { + // IonPos[index]= r; + // IonTypes[index]= primTypes[iat]; + // index++; + // } + // } + // if (index != primPos.size()*numCopies) + // { + // app_error() << "The number of tiled ions, " << IonPos.size() + // << ", does not match the expected number of " + // << primPos.size()*numCopies << " or the index "<< index + // <<". Aborting.\n"; + // APP_ABORT("EinsplineSetBuilder::TileIons()"); + // } + // if (myComm->rank() == 0) + // { + // char buf[1000]; + // snprintf (buf, 1000, "Supercell reduced ion positions = \n"); + // app_log() << buf; + // app_log().flush(); + // for (int i=0; i +bool +EinsplineSetBuilderT::TwistPair(PosType a, PosType b) const +{ + bool pair = true; + for (int n = 0; n < OHMMS_DIM; n++) { + double d = a[n] + b[n]; + if (std::abs(d - round(d)) > MatchingTol) + pair = false; + } + return pair; +} + +template +void +EinsplineSetBuilderT::AnalyzeTwists2( + const int twist_num_inp, const TinyVector& twist_inp) +{ + Tensor S; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + S(i, j) = (double)TileMatrix(i, j); + + const int num_prim_kpoints = primcell_kpoints.size(); + + // build a list of unique super twists that all the primitive cell k-point + // correspond to. + std::vector superFracs; // twist super twist coordinates + std::vector + superIndex; // the indices of the super twists that correpsond to all + // the primitive cell k-points in the unique list. + { + // scan all the primitive cell k-points + for (int ki = 0; ki < num_prim_kpoints; ki++) { + PosType primTwist = primcell_kpoints[ki]; + PosType superTwist = dot(S, primTwist); + PosType kp = PrimCell.k_cart(primTwist); + PosType ks = SuperCell.k_cart(superTwist); + // check the consistency of tiling, primitive and super cells. + if (dot(ks - kp, ks - kp) > 1.0e-6) { + app_error() << "Primitive and super k-points do not agree. " + "Error in coding.\n"; + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + PosType frac = FracPart(superTwist); + // verify if the super twist that correpsonds to this primitive cell + // k-point exists in the unique list or not. + bool found = false; + for (int j = 0; j < superFracs.size(); j++) { + PosType diff = frac - superFracs[j]; + if (dot(diff, diff) < 1.0e-6) { + found = true; + superIndex.push_back(j); + } + } + if (!found) { + superIndex.push_back(superFracs.size()); + superFracs.push_back(frac); + } + } + assert(superIndex.size() == num_prim_kpoints); + } + + const int numSuperTwists = superFracs.size(); + { + app_log() << "Found " << numSuperTwists << " distinct supercell twist" + << (numSuperTwists > 1 ? "s" : "") << " based on " + << num_prim_kpoints << " primitive cell k-point" + << (num_prim_kpoints > 1 ? "s" : "") << std::endl; + if (this->myComm->rank() == 0) { + int n_tot_irred(0); + for (int si = 0; si < numSuperTwists; si++) { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "Super twist #%d: [ %9.5f %9.5f %9.5f ]\n", si, + superFracs[si][0], superFracs[si][1], superFracs[si][2]); + if (length < 0) + throw std::runtime_error( + "Error converting Super twist to a string"); + app_log() << std::string_view(buf.data(), length); + app_log().flush(); + } + } + } + + // For each supercell twist, create a list of primitive twists which + // correspond to it. + std::vector> superSets; + { + superSets.resize(numSuperTwists); + for (int ki = 0; ki < num_prim_kpoints; ki++) + superSets[superIndex[ki]].push_back(ki); + } + + { // look up a super cell twist and return its index in the unique list of + // super cell twists. + std::function find_twist = + [&](const TinyVector& twist) { + int twist_num = -1; + PosType gtFrac = FracPart(twist); + float eps = 1e-5; + for (int si = 0; si < numSuperTwists; si++) { + PosType locDiff = gtFrac - superFracs[si]; + if (dot(locDiff, locDiff) < eps) + twist_num = si; + } + + if (twist_num < 0) { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "AnalyzeTwists2. Input twist [ %9.5f %9.5f %9.5f] not " + "found in the list of super twists above.\n", + twist[0], twist[1], twist[2]); + if (length < 0) + throw std::runtime_error( + "Error generating error message"); + throw UniformCommunicateError(buf.data()); + } + return twist_num; + }; + + if (twist_inp[0] > TWIST_NO_INPUT || twist_inp[1] > TWIST_NO_INPUT || + twist_inp[2] > TWIST_NO_INPUT) { + if (twist_num_inp != TWISTNUM_NO_INPUT) + app_warning() + << "twist attribute exists. twistnum attribute ignored. " + "To prevent this message, remove twistnum from input." + << std::endl; + + twist_num_ = find_twist(twist_inp); + } + else if (twist_num_inp != TWISTNUM_NO_INPUT) { + app_warning() << "twist attribute does't exist but twistnum " + "attribute was found. " + << "This is potentially ambiguous. Specifying twist " + "attribute is preferred." + << std::endl; + if (twist_num_inp < 0 || twist_num_inp >= numSuperTwists) { + std::ostringstream msg; + msg << "AnalyzeTwists2. twistnum input value " << twist_num_inp + << " is outside the acceptable range [0, " << numSuperTwists + << ")." << std::endl; + throw UniformCommunicateError(msg.str()); + } + twist_num_ = twist_num_inp; + } + else { + app_log() << "twist attribte does't exist. Set Gamma point." + << std::endl; + twist_num_ = find_twist({0, 0, 0}); + } + + assert(twist_num_ >= 0 && twist_num_ < numSuperTwists); + + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + " Using supercell twist %d: [ %9.5f %9.5f %9.5f]", twist_num_, + superFracs[twist_num_][0], superFracs[twist_num_][1], + superFracs[twist_num_][2]); + if (length < 0) + throw std::runtime_error( + "Error converting supercell twist to a string"); + app_log() << std::string_view(buf.data(), length) << std::endl; + } + + TargetPtcl.setTwist(superFracs[twist_num_]); +#ifndef QMC_COMPLEX + // Check to see if supercell twist is okay to use with real wave + // functions + for (int dim = 0; dim < OHMMS_DIM; dim++) { + double t = 2.0 * superFracs[twist_num_][dim]; + if (std::abs(t - round(t)) > MatchingTol * 100) { + app_error() + << "Cannot use this super twist with real wavefunctions.\n" + << "Please recompile with QMC_COMPLEX=1.\n"; + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + } +#endif + // Now check to see that each supercell twist has the right twists + // to tile the primitive cell orbitals. + const int numTwistsNeeded = std::abs(det(TileMatrix)); + for (int si = 0; si < numSuperTwists; si++) { + // First make sure we have enough points + if (superSets[si].size() != numTwistsNeeded) { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "Super twist %d should own %d k-points, but owns %d.\n", si, + numTwistsNeeded, static_cast(superSets[si].size())); + if (length < 0) + throw std::runtime_error("Error generating Super twist string"); + app_error() << std::string_view(buf.data(), length); + if (si == twist_num_) { + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + else + continue; + } + // Now, make sure they are all distinct + int N = superSets[si].size(); + for (int i = 0; i < N; i++) { + PosType twistPrim_i = primcell_kpoints[superSets[si][i]]; + PosType twistSuper_i = dot(S, twistPrim_i); + PosType superInt_i = IntPart(twistSuper_i); + for (int j = i + 1; j < N; j++) { + PosType twistPrim_j = primcell_kpoints[superSets[si][j]]; + PosType twistSuper_j = dot(S, twistPrim_j); + PosType superInt_j = IntPart(twistSuper_j); + if (dot(superInt_i - superInt_j, superInt_i - superInt_j) < + 1.0e-6) { + app_error() + << "Identical k-points detected in super twist set " + << si << std::endl; + APP_ABORT_TRACE(__FILE__, __LINE__, "AnalyzeTwists2"); + } + } + } + } + app_log().flush(); + // Finally, record which k-points to include on this group of + // processors, which have been assigned supercell twist twist_num_ + IncludeTwists.clear(); + for (int i = 0; i < superSets[twist_num_].size(); i++) + IncludeTwists.push_back(superSets[twist_num_][i]); + // Now, find out which twists are distinct + DistinctTwists.clear(); +#ifndef QMC_COMPLEX + std::vector copyTwists; + for (int i = 0; i < IncludeTwists.size(); i++) { + int ti = IncludeTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + bool distinct = true; + for (int j = i + 1; j < IncludeTwists.size(); j++) { + int tj = IncludeTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + PosType sum = twist_i + twist_j; + PosType diff = twist_i - twist_j; + if (TwistPair(twist_i, twist_j)) + distinct = false; + } + if (distinct) + DistinctTwists.push_back(ti); + else + copyTwists.push_back(ti); + } + // Now determine which distinct twists require two copies + MakeTwoCopies.resize(DistinctTwists.size()); + for (int i = 0; i < DistinctTwists.size(); i++) { + MakeTwoCopies[i] = false; + int ti = DistinctTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + for (int j = 0; j < copyTwists.size(); j++) { + int tj = copyTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + if (TwistPair(twist_i, twist_j)) + MakeTwoCopies[i] = true; + } + if (this->myComm->rank() == 0) { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "Using %d copies of twist angle [%6.3f, %6.3f, %6.3f]\n", + MakeTwoCopies[i] ? 2 : 1, twist_i[0], twist_i[1], twist_i[2]); + if (length < 0) + throw std::runtime_error("Error generating string"); + app_log() << std::string_view(buf.data(), length); + app_log().flush(); + } + } + // Find out if we can make real orbitals + use_real_splines_ = true; + for (int i = 0; i < DistinctTwists.size(); i++) { + int ti = DistinctTwists[i]; + PosType twist = primcell_kpoints[ti]; + for (int j = 0; j < OHMMS_DIM; j++) + if (std::abs(twist[j] - 0.0) > MatchingTol && + std::abs(twist[j] - 0.5) > MatchingTol && + std::abs(twist[j] + 0.5) > MatchingTol) + use_real_splines_ = false; + } + if (use_real_splines_ && (DistinctTwists.size() > 1)) { + app_log() << "***** Use of real orbitals is possible, but not " + "currently implemented\n" + << " with more than one twist angle.\n"; + use_real_splines_ = false; + } + if (use_real_splines_) + app_log() << "Using real splines.\n"; + else + app_log() << "Using complex splines.\n"; +#else + DistinctTwists.resize(IncludeTwists.size()); + MakeTwoCopies.resize(IncludeTwists.size()); + for (int i = 0; i < IncludeTwists.size(); i++) { + DistinctTwists[i] = IncludeTwists[i]; + MakeTwoCopies[i] = false; + } + use_real_splines_ = false; +#endif +} + +template +void +EinsplineSetBuilderT::OccupyBands( + int spin, int sortBands, int numOrbs, bool skipChecks) +{ + if (this->myComm->rank() != 0) + return; + if (spin >= NumSpins && !skipChecks) { + app_error() << "To developer: User is requesting for orbitals in an " + "invalid spin group " + << spin << ". Current h5 file only contains spin groups " + << "[0.." << NumSpins - 1 << "]." << std::endl; + app_error() << "To user: Orbital H5 file contains no spin down data " + "and is appropriate only for spin unpolarized " + "calculations. " + << "If this is your intent, please replace 'spindataset=1' " + "with 'spindataset=0' in the input file." + << std::endl; + abort(); + } + if (Format == ESHDF) { + OccupyBands_ESHDF(spin, sortBands, numOrbs); + return; + } + std::string eigenstatesGroup; + if (Version[0] == 0 && Version[1] == 11) + eigenstatesGroup = "/eigenstates_3"; + else if (Version[0] == 0 && Version[1] == 20) + eigenstatesGroup = "/eigenstates"; + + if (FullBands[spin]->size()) { + app_log() << " FullBand[" << spin << "] exists. Reuse it. " + << std::endl; + return; + } + + std::vector& SortBands(*FullBands[spin]); + + SortBands.clear(); + for (int ti = 0; ti < DistinctTwists.size(); ti++) { + int tindex = DistinctTwists[ti]; + // First, read valence states + for (int bi = 0; bi < NumBands; bi++) { + BandInfo band; + band.TwistIndex = tindex; + band.BandIndex = bi; + band.MakeTwoCopies = MakeTwoCopies[ti]; + // Read eigenenergy from file + std::ostringstream ePath, sPath; + if ((Version[0] == 0 && Version[1] == 11) || NumTwists > 1) { + ePath << eigenstatesGroup << "/twist_" << tindex << "/band_" + << bi << "/eigenvalue"; + sPath << eigenstatesGroup << "/twist_" << tindex << "/band_" + << bi << "/spin"; + } + else if (NumBands > 1) { + ePath << eigenstatesGroup << "/twist/band_" << bi + << "/eigenvalue"; + sPath << eigenstatesGroup << "/twist/band_" << bi << "/spin"; + } + else { + ePath << eigenstatesGroup << "/twist/band/eigenvalue"; + sPath << eigenstatesGroup << "/twist/band/spin"; + } + band.Energy = -1.01e100; + H5File.read(band.Energy, ePath.str()); + if (band.Energy > -1.0e100) { + H5File.read(band.Spin, sPath.str()); + if (band.Spin == spin) + SortBands.push_back(band); + } + } + } + int orbIndex = 0; + int numOrbs_counter = 0; + while (numOrbs_counter < numOrbs) { + if (SortBands[orbIndex].MakeTwoCopies) + numOrbs_counter += 2; + else + numOrbs_counter++; + orbIndex++; + } + NumDistinctOrbitals = orbIndex; + app_log() << "We will read " << NumDistinctOrbitals + << " distinct orbitals.\n"; +} + +template +void +EinsplineSetBuilderT::bcastSortBands(int spin, int n, bool root) +{ + std::vector& SortBands(*FullBands[spin]); + + TinyVector nbands(int(SortBands.size()), n); + mpi::bcast(*this->myComm, nbands); + + // buffer to serialize BandInfo + PooledData misc(nbands[0] * 4); + n = NumDistinctOrbitals = nbands[1]; + + if (root) { + misc.rewind(); + for (int i = 0; i < n; ++i) { + misc.put(SortBands[i].TwistIndex); + misc.put(SortBands[i].BandIndex); + misc.put(SortBands[i].Energy); + misc.put(SortBands[i].MakeTwoCopies); + } + + for (int i = n; i < SortBands.size(); ++i) { + misc.put(SortBands[i].TwistIndex); + misc.put(SortBands[i].BandIndex); + misc.put(SortBands[i].Energy); + misc.put(SortBands[i].MakeTwoCopies); + } + } + this->myComm->bcast(misc); + + if (!root) { + SortBands.resize(nbands[0]); + misc.rewind(); + for (int i = 0; i < n; ++i) { + misc.get(SortBands[i].TwistIndex); + misc.get(SortBands[i].BandIndex); + misc.get(SortBands[i].Energy); + misc.get(SortBands[i].MakeTwoCopies); + } + for (int i = n; i < SortBands.size(); ++i) { + misc.get(SortBands[i].TwistIndex); + misc.get(SortBands[i].BandIndex); + misc.get(SortBands[i].Energy); + misc.get(SortBands[i].MakeTwoCopies); + } + } +} + +inline bool +sortByIndex(BandInfo leftB, BandInfo rightB) +{ + if (leftB.BandIndex == rightB.BandIndex) { + if ((leftB.Energy < rightB.Energy + 1e-6) && + (leftB.Energy > rightB.Energy - 1e-6)) + return leftB.TwistIndex < rightB.TwistIndex; + else + return leftB.Energy < rightB.Energy; + } + else + return (leftB.BandIndex < rightB.BandIndex); +}; + +template +bool +EinsplineSetBuilderT::ReadOrbitalInfo_ESHDF(bool skipChecks) +{ + app_log() << " Reading orbital file in ESHDF format.\n"; + H5File.read(Version, "/version"); + app_log() << " ESHDF orbital file version " << Version[0] << "." + << Version[1] << "." << Version[2] << std::endl; + H5File.read(Lattice, "/supercell/primitive_vectors"); + RecipLattice = 2.0 * M_PI * inverse(Lattice); + SuperLattice = dot(TileMatrix, Lattice); + std::array buff; + int length = std::snprintf(buff.data(), buff.size(), + " Lattice = \n [ %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f ]\n", + Lattice(0, 0), Lattice(0, 1), Lattice(0, 2), Lattice(1, 0), + Lattice(1, 1), Lattice(1, 2), Lattice(2, 0), Lattice(2, 1), + Lattice(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting lattice to a string"); + app_log() << std::string_view(buff.data(), length); + length = std::snprintf(buff.data(), buff.size(), + " SuperLattice = \n [ %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f ]\n", + SuperLattice(0, 0), SuperLattice(0, 1), SuperLattice(0, 2), + SuperLattice(1, 0), SuperLattice(1, 1), SuperLattice(1, 2), + SuperLattice(2, 0), SuperLattice(2, 1), SuperLattice(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting SuperLattice to a string"); + app_log() << std::string_view(buff.data(), length) << std::endl; + if (!CheckLattice()) + throw std::runtime_error("CheckLattice failed"); + PrimCell.set(Lattice); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI); + int have_dpsi = false; + NumTwists = NumSpins = NumBands = 0; + NumElectrons = TargetPtcl.getTotalNum(); + H5File.read(NumBands, "/electrons/kpoint_0/spin_0/number_of_states"); + H5File.readEntry(NumSpins, "/electrons/number_of_spins"); + H5File.read(NumTwists, "/electrons/number_of_kpoints"); + H5File.readEntry(have_dpsi, "/electrons/have_dpsi"); + HaveOrbDerivs = have_dpsi; + app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons + << ", spins=" << NumSpins << ", twists=" << NumTwists + << std::endl; + ////////////////////////////////// + // Read ion types and locations // + ////////////////////////////////// + Vector species_ids; + H5File.read(species_ids, "/atoms/species_ids"); + int num_species; + H5File.read(num_species, "/atoms/number_of_species"); + std::vector atomic_numbers(num_species); + for (int isp = 0; isp < num_species; isp++) { + std::ostringstream name; + name << "/atoms/species_" << isp << "/atomic_number"; + H5File.readEntry(atomic_numbers[isp], name.str()); + } + IonTypes.resize(species_ids.size()); + for (int i = 0; i < species_ids.size(); i++) + IonTypes[i] = atomic_numbers[species_ids[i]]; + H5File.read(IonPos, "/atoms/positions"); + for (int i = 0; i < IonTypes.size(); i++) + app_log() << "Atom type(" << i << ") = " << IonTypes[i] << std::endl; + ///////////////////////////////////// + // Read atom orbital info from xml // + ///////////////////////////////////// + // construct Super2Prim mapping. + if (Super2Prim.size() == 0) { + // SourcePtcl->convert2Cart(SourcePtcl->R); + Super2Prim.resize(SourcePtcl->R.size(), -1); + std::vector prim_atom_counts; + prim_atom_counts.resize(IonPos.size(), 0); + for (int i = 0; i < SourcePtcl->R.size(); i++) { + PosType ref = PrimCell.toUnit_floor(SourcePtcl->R[i]); + for (int j = 0; j < IonPos.size(); j++) { + PosType dr = PrimCell.toUnit_floor(IonPos[j]) - ref; + for (int k = 0; k < OHMMS_DIM; k++) + dr[k] -= round(dr[k]); + if (dot(dr, dr) < MatchingTol) { + if (Super2Prim[i] < 0) { + Super2Prim[i] = j; + prim_atom_counts[j]++; + } + else { + app_error() + << "Supercell ion " << i << " at " + << SourcePtcl->R[j] + << " was found twice in the primitive cell as ion " + << Super2Prim[i] << " and " << j << std::endl; + if (!skipChecks) + abort(); + } + } + } + if (Super2Prim[i] < 0) { + app_error() << "Supercell ion " << i + << " not found in the primitive cell" << std::endl; + if (!skipChecks) + abort(); + } + else { + // app_log() << "Supercell ion " << i << " mapped to primitive + // cell ion " << Super2Prim[i] << std::endl; + } + } + const int tiling_size = std::abs(det(TileMatrix)); + for (int i = 0; i < IonPos.size(); i++) + if (prim_atom_counts[i] != tiling_size) { + app_error() << "Primitive cell ion " << i << " was found only " + << prim_atom_counts[i] + << " times in the supercell rather than " + << tiling_size << std::endl; + if (!skipChecks) + abort(); + } + // construct AtomicCentersInfo + AtomicCentersInfo.resize(IonPos.size()); + for (int i = 0; i < IonPos.size(); i++) + AtomicCentersInfo.ion_pos[i] = IonPos[i]; + const auto& source_species = SourcePtcl->getSpeciesSet(); + int Zind = source_species.findAttribute("atomicnumber"); + const int table_id = SourcePtcl->addTable(*SourcePtcl); + const auto& ii_table = SourcePtcl->getDistTable(table_id); + SourcePtcl->update(true); + for (int i = 0; i < IonPos.size(); i++) { + AtomicCentersInfo.non_overlapping_radius[i] = + std::numeric_limits::max(); + // should only call get_first_neighbor to set non_overlapping_radius + // if there are more than one atom in the cell + if (Super2Prim.size() == 1) + continue; + for (int j = 0; j < Super2Prim.size(); j++) + if (Super2Prim[j] == i) { + // set GroupID for each ion in primitive cell + if ((Zind < 0) || + (source_species(Zind, SourcePtcl->GroupID[j]) == + IonTypes[i])) + AtomicCentersInfo.GroupID[i] = SourcePtcl->GroupID[j]; + else { + app_error() + << "Primitive cell ion " << i + << " vs supercell ion " << j + << " atomic number not matching: " << IonTypes[i] + << " vs " + << source_species(Zind, SourcePtcl->GroupID[j]) + << std::endl; + if (!skipChecks) + abort(); + } + // set non_overlapping_radius for each ion in primitive cell + RealType r(0); + PosType dr; + ii_table.get_first_neighbor(j, r, dr, false); + if (r < 1e-3) + APP_ABORT("EinsplineSetBuilder::ReadOrbitalInfo_ESHDF " + "too close ions <1e-3 bohr!"); + AtomicCentersInfo.non_overlapping_radius[i] = 0.5 * r; + break; + } + } + + // load cutoff_radius, spline_radius, spline_npoints, lmax if exists. + const int inner_cutoff_ind = + source_species.findAttribute("inner_cutoff"); + const int cutoff_radius_ind = + source_species.findAttribute("cutoff_radius"); + const int spline_radius_ind = + source_species.findAttribute("spline_radius"); + const int spline_npoints_ind = + source_species.findAttribute("spline_npoints"); + const int lmax_ind = source_species.findAttribute("lmax"); + + for (int center_idx = 0; center_idx < AtomicCentersInfo.Ncenters; + center_idx++) { + const int my_GroupID = AtomicCentersInfo.GroupID[center_idx]; + if (inner_cutoff_ind >= 0) + AtomicCentersInfo.inner_cutoff[center_idx] = + source_species(inner_cutoff_ind, my_GroupID); + if (cutoff_radius_ind >= 0) + AtomicCentersInfo.cutoff[center_idx] = + source_species(cutoff_radius_ind, my_GroupID); + if (spline_radius_ind >= 0) + AtomicCentersInfo.spline_radius[center_idx] = + source_species(spline_radius_ind, my_GroupID); + if (spline_npoints_ind >= 0) + AtomicCentersInfo.spline_npoints[center_idx] = + source_species(spline_npoints_ind, my_GroupID); + if (lmax_ind >= 0) + AtomicCentersInfo.lmax[center_idx] = + source_species(lmax_ind, my_GroupID); + } + } + /////////////////////////// + // Read the twist angles // + /////////////////////////// + primcell_kpoints.resize(NumTwists); + for (int ti = 0; ti < NumTwists; ti++) { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/reduced_k"; + TinyVector primcell_kpoints_DP; + H5File.read(primcell_kpoints_DP, path.str()); + primcell_kpoints[ti] = primcell_kpoints_DP; + } + if (qmc_common.use_density) { + ////////////////////////////////////////////////////////// + // Only if it is bulk: If the density has not been set in TargetPtcl, + // and // the density is available, read it in and save it // in + // TargetPtcl. // + ////////////////////////////////////////////////////////// + if (TargetPtcl.getLattice().SuperCellEnum == SUPERCELL_BULK) { + // FIXME: add support for more than one spin density + if (TargetPtcl.Density_G.empty()) { + Array Density_r_DP; + TinyVector mesh; + H5File.read(TargetPtcl.DensityReducedGvecs, + "/electrons/density/gvectors"); + int numG = TargetPtcl.DensityReducedGvecs.size(); +// Convert primitive G-vectors to supercell G-vectors +// Also, flip sign since ESHDF format uses opposite sign convention +#pragma omp parallel for + for (int iG = 0; iG < numG; iG++) + TargetPtcl.DensityReducedGvecs[iG] = -1 * + dot(TileMatrix, TargetPtcl.DensityReducedGvecs[iG]); + app_log() << " Read " << numG << " density G-vectors.\n"; + for (int ispin = 0; ispin < NumSpins; ispin++) { + std::ostringstream density_r_path, density_g_path; + density_r_path << "/electrons/density/spin_" << ispin + << "/density_r"; + density_g_path << "/electrons/density/spin_" << ispin + << "/density_g"; + H5File.readEntry(Density_r_DP, density_r_path.str()); + TargetPtcl.Density_r = Density_r_DP; + if (TargetPtcl.DensityReducedGvecs.size()) { + app_log() << " EinsplineSetBuilder found density in " + "the HDF5 file.\n"; + std::vector density_G; + std::vector> Density_G_DP; + H5File.read(Density_G_DP, density_g_path.str()); + density_G.assign( + Density_G_DP.begin(), Density_G_DP.end()); + if (!density_G.size()) { + app_error() << " Density reduced G-vectors " + "defined, but not the" + << " density.\n"; + abort(); + } + else { + if (ispin == 0) + TargetPtcl.Density_G = density_G; + else + for (int iG = 0; iG < density_G.size(); iG++) + TargetPtcl.Density_G[iG] += density_G[iG]; + } + } + } + } + ////////////////////////////////////////////////////////// + // If the density has not been set in TargetPtcl, and // + // the density is available, read it in and save it // + // in TargetPtcl. // + ////////////////////////////////////////////////////////// + // FIXME: add support for more than one spin potential + if (!TargetPtcl.VHXC_r[0].size()) { + TinyVector mesh; + H5File.readEntry( + TargetPtcl.VHXCReducedGvecs, "/electrons/VHXC/gvectors"); + int numG = TargetPtcl.VHXCReducedGvecs.size(); +// Convert primitive G-vectors to supercell G-vectors +// Also, flip sign since ESHDF format uses opposite sign convention +#pragma omp parallel for + for (int iG = 0; iG < numG; iG++) + TargetPtcl.VHXCReducedGvecs[iG] = + -1 * dot(TileMatrix, TargetPtcl.VHXCReducedGvecs[iG]); + app_log() << " Read " << numG << " VHXC G-vectors.\n"; + for (int ispin = 0; ispin < NumSpins; ispin++) { + Array VHXC_r_DP; + std::ostringstream VHXC_r_path, VHXC_g_path; + VHXC_r_path << "/electrons/VHXC/spin_" << ispin + << "/VHXC_r"; + VHXC_g_path << "/electrons/VHXC/spin_" << ispin + << "/VHXC_g"; + H5File.readEntry(VHXC_r_DP, VHXC_r_path.str()); + TargetPtcl.VHXC_r[ispin] = VHXC_r_DP; + if (TargetPtcl.VHXCReducedGvecs.size()) { + app_log() << " EinsplineSetBuilder found VHXC in the " + "HDF5 file.\n"; + std::vector> VHXC_G_DP; + std::vector VHXC_G; + H5File.read(VHXC_G_DP, VHXC_g_path.str()); + VHXC_G.assign(VHXC_G_DP.begin(), VHXC_G_DP.end()); + if (!VHXC_G.size()) { + app_error() << " VHXC reduced G-vectors defined, " + "but not the" + << " VHXC.\n"; + abort(); + } + else + TargetPtcl.VHXC_G[ispin] = VHXC_G; + } + } + } + } + } + else { + app_log() << " Skip initialization of the density" << std::endl; + } + return true; +} + +template +void +EinsplineSetBuilderT::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs) +{ + if (this->myComm->rank() != 0) + return; + + std::vector& SortBands(*FullBands[spin]); + SortBands.clear(); //??? can exit if SortBands is already made? + int maxOrbs(0); + for (int ti = 0; ti < DistinctTwists.size(); ti++) { + int tindex = DistinctTwists[ti]; + // First, read valence states + std::ostringstream ePath; + ePath << "/electrons/kpoint_" << tindex << "/spin_" << spin + << "/eigenvalues"; + std::vector eigvals; + H5File.read(eigvals, ePath.str()); + for (int bi = 0; bi < NumBands; bi++) { + BandInfo band; + band.TwistIndex = tindex; + band.BandIndex = bi; + band.MakeTwoCopies = MakeTwoCopies[ti]; + band.Energy = eigvals[bi]; + if (band.Energy > -1.0e100) + SortBands.push_back(band); + if (MakeTwoCopies[ti]) + maxOrbs += 2; + else + maxOrbs++; + } + } + + app_log() + << SortBands.size() + << " complex-valued orbitals supplied by h5 can be expanded up to " + << maxOrbs << " SPOs." << std::endl; + if (maxOrbs < numOrbs) + this->myComm->barrier_and_abort( + "EinsplineSetBuilder::OccupyBands_ESHDF user input requests " + "more orbitals than what the h5 file supplies."); + + // Now sort the bands by energy + if (sortBands == 2) { + app_log() << "Sorting the bands by index now:\n"; + sort(SortBands.begin(), SortBands.end(), sortByIndex); + } + else if (sortBands == 1) { + app_log() << "Sorting the bands now:\n"; + sort(SortBands.begin(), SortBands.end()); + } + + std::vector gsOcc(maxOrbs); + int N_gs_orbs = numOrbs; + int nocced(0); + for (int ti = 0; ti < SortBands.size(); ti++) { + if (nocced < N_gs_orbs) { + if (SortBands[ti].MakeTwoCopies && (N_gs_orbs - nocced > 1)) { + nocced += 2; + gsOcc[ti] = 2; + } + else if ((SortBands[ti].MakeTwoCopies && + (N_gs_orbs - nocced == 1)) || + !SortBands[ti].MakeTwoCopies) { + nocced += 1; + gsOcc[ti] = 1; + } + } + } + if (occ_format == "energy") { + app_log() << " Occupying bands based on energy in mode " + << (Occ.size() > 0 ? "\"excited\"" : "\"ground\"") + << std::endl; + // To get the occupations right. + std::vector Removed(0, 0); + std::vector Added(0, 0); + for (int ien = 0; ien < Occ.size(); ien++) { + if (Occ[ien] < 0) + Removed.push_back(-Occ[ien]); + else if (Occ[ien] > 0) + Added.push_back(Occ[ien]); + } + if (Added.size() - Removed.size() != 0) { + app_log() << "need to add and remove same number of orbitals. " + << Added.size() << " " << Removed.size() << std::endl; + APP_ABORT("ChangedOccupations"); + } + std::vector DiffOcc(maxOrbs, 0); + // Probably a cleaner way to do this. + for (int i = 0; i < Removed.size(); i++) + DiffOcc[Removed[i] - 1] -= 1; + for (int i = 0; i < Added.size(); i++) + DiffOcc[Added[i] - 1] += 1; + std::vector SumOrb(SortBands.size(), 0); + int doi(0); + for (int i = 0; i < SumOrb.size(); i++) { + if (SortBands[i].MakeTwoCopies) { + SumOrb[i] = gsOcc[i] + DiffOcc[doi++]; + SumOrb[i] += DiffOcc[doi++]; + } + else + SumOrb[i] = gsOcc[i] + DiffOcc[doi++]; + } + std::vector ReOrderedBands; + std::vector RejectedBands; + for (int i = 0; i < SumOrb.size(); i++) { + if (SumOrb[i] == 2) { + SortBands[i].MakeTwoCopies = true; + ReOrderedBands.push_back(SortBands[i]); + } + else if (SumOrb[i] == 1) { + SortBands[i].MakeTwoCopies = false; + ReOrderedBands.push_back(SortBands[i]); + } + else if (SumOrb[i] == 0) { + SortBands[i].MakeTwoCopies = false; + RejectedBands.push_back(SortBands[i]); + } + else { + app_log() << " Trying to add the same orbital (" << i + << ") less than zero or more than 2 times." + << std::endl; + APP_ABORT("Sorting Excitation"); + } + } + ReOrderedBands.insert( + ReOrderedBands.end(), RejectedBands.begin(), RejectedBands.end()); + SortBands = ReOrderedBands; + } + else if (occ_format == "band") { + app_log() << " Occupying bands based on (ti,bi) data." << std::endl; + if (Occ.size() != particle_hole_pairs * 4) { + app_log() + << " Need Occ = pairs*4. Occ is (ti,bi) of removed, then added." + << std::endl; + app_log() << Occ.size() << " " << particle_hole_pairs << std::endl; + APP_ABORT("ChangedOccupations"); + } + int cnt(0); + for (int ien = 0; ien < SortBands.size(); ien++) { + if ((Occ[cnt] == SortBands[ien].TwistIndex) && + (Occ[cnt + 1] == SortBands[ien].BandIndex)) { + if (cnt < particle_hole_pairs * 2) { + gsOcc[ien] -= 1; + cnt += 2; + app_log() << "removing orbital " << ien << std::endl; + } + else { + gsOcc[ien] += 1; + app_log() << "adding orbital " << ien << std::endl; + cnt += 2; + } + } + } + std::vector ReOrderedBands; + std::vector RejectedBands; + for (int i = 0; i < SortBands.size(); i++) { + if (gsOcc[i] == 2) { + SortBands[i].MakeTwoCopies = true; + ReOrderedBands.push_back(SortBands[i]); + } + else if (gsOcc[i] == 1) { + SortBands[i].MakeTwoCopies = false; + ReOrderedBands.push_back(SortBands[i]); + } + else if (gsOcc[i] == 0) { + SortBands[i].MakeTwoCopies = false; + RejectedBands.push_back(SortBands[i]); + } + else { + app_log() << " Trying to add the same orbital (" << i + << ") less than zero or more than 2 times." + << std::endl; + APP_ABORT("Sorting Excitation"); + } + } + ReOrderedBands.insert( + ReOrderedBands.end(), RejectedBands.begin(), RejectedBands.end()); + SortBands = ReOrderedBands; + } + // for(int sw=0;sw +void +EinsplineSetBuilderT::set_metadata(int numOrbs, int twist_num_inp, + const TinyVector& twist_inp, bool skipChecks) +{ + // 1. set a lot of internal parameters in the EinsplineSetBuilder class + // e.g. TileMatrix, use_real_splines_, DistinctTwists, MakeTwoCopies. + // 2. this is also where metadata for the orbitals are read from the + // wavefunction hdf5 file + // and broadcast to MPI groups. Variables broadcasted are listed in + // EinsplineSetBuilderCommon.cpp + // EinsplineSetBuilder::BroadcastOrbitalInfo() + // + + Timer orb_info_timer; + // The tiling can be set by a simple vector, (e.g. 2x2x2), or by a + // full 3x3 matrix of integers. If the tilematrix was not set in + // the input file... + bool matrixNotSet = true; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + matrixNotSet = matrixNotSet && (TileMatrix(i, j) == 0); + // then set the matrix to identity. + if (matrixNotSet) + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + TileMatrix(i, j) = (i == j) ? 1 : 0; + if (this->myComm->rank() == 0) { + std::array buff; + int length = std::snprintf(buff.data(), buff.size(), + " TileMatrix = \n [ %2d %2d %2d\n %2d %2d %2d\n %2d %2d %2d " + "]\n", + TileMatrix(0, 0), TileMatrix(0, 1), TileMatrix(0, 2), + TileMatrix(1, 0), TileMatrix(1, 1), TileMatrix(1, 2), + TileMatrix(2, 0), TileMatrix(2, 1), TileMatrix(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting TileMatrix to a string"); + app_log() << std::string_view(buff.data(), length); + } + if (numOrbs == 0) + this->myComm->barrier_and_abort( + "EinsplineSetBuilder::createSPOSet You must specify the number of " + "orbitals in the input file."); + else + app_log() << " Reading " << numOrbs << " orbitals from HDF5 file.\n"; + + ///////////////////////////////////////////////////////////////// + // Read the basic orbital information, without reading all the // + // orbitals themselves. // + ///////////////////////////////////////////////////////////////// + orb_info_timer.restart(); + if (this->myComm->rank() == 0) + if (!ReadOrbitalInfo(skipChecks)) + throw std::runtime_error("EinsplineSetBuilder::set_metadata Error " + "reading orbital info from HDF5 file."); + app_log() << "TIMER EinsplineSetBuilder::ReadOrbitalInfo " + << orb_info_timer.elapsed() << std::endl; + this->myComm->barrier(); + + orb_info_timer.restart(); + BroadcastOrbitalInfo(); + app_log() << "TIMER EinsplineSetBuilder::BroadcastOrbitalInfo " + << orb_info_timer.elapsed() << std::endl; + app_log().flush(); + + // setup primitive cell and supercell + PrimCell.set(Lattice); + SuperCell.set(SuperLattice); + GGt = dot(transpose(PrimCell.G), PrimCell.G); + + // Now, analyze the k-point mesh to figure out the what k-points are needed + AnalyzeTwists2(twist_num_inp, twist_inp); +} + +template +std::unique_ptr> +EinsplineSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +{ + // use 2 bohr as the default when truncated orbitals are used based on the + // extend of the ions + int numOrbs = 0; + int sortBands(1); + int spinSet = 0; + bool skipChecks = false; + int twist_num_inp = TWISTNUM_NO_INPUT; + TinyVector twist_inp(TWIST_NO_INPUT); + + std::string sourceName; + std::string spo_prec("double"); + std::string truncate("no"); + std::string hybrid_rep("no"); + std::string skip_checks("no"); + std::string use_einspline_set_extended( + "no"); // use old spline library for high-order derivatives, e.g. needed + // for backflow optimization + std::string useGPU; + std::string GPUsharing = "no"; + std::string spo_object_name; + + ScopedTimer spo_timer_scope(createGlobalTimer( + "einspline::CreateSPOSetFromXML", timer_level_medium)); + + { + TinyVector TileFactor_do_not_use; + OhmmsAttributeSet a; + a.add(H5FileName, "href"); + a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED); + a.add(sortBands, "sort"); + a.add(TileMatrix, "tilematrix"); + a.add(twist_num_inp, "twistnum"); + a.add(twist_inp, "twist"); + a.add(sourceName, "source"); + a.add(MeshFactor, "meshfactor"); + a.add(hybrid_rep, "hybridrep"); + a.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values); + a.add(GPUsharing, + "gpusharing"); // split spline across GPUs visible per rank + a.add(spo_prec, "precision"); + a.add(truncate, "truncate"); + a.add(this->myName, "tag"); + a.add(skip_checks, "skip_checks"); + + a.put(XMLRoot); + a.add(numOrbs, "size"); + a.add(numOrbs, "norbs"); + a.add(spinSet, "spindataset"); + a.add(spinSet, "group"); + a.put(cur); + + if (this->myName.empty()) + this->myName = "einspline"; + } + + if (skip_checks == "yes") + skipChecks = true; + + auto pit(ParticleSets.find(sourceName)); + if (pit == ParticleSets.end()) + this->myComm->barrier_and_abort( + "Einspline needs the source particleset"); + else + SourcePtcl = pit->second.get(); + + /////////////////////////////////////////////// + // Read occupation information from XML file // + /////////////////////////////////////////////// + const std::vector last_occ(Occ); + Occ.resize(0, 0); // correspond to ground + bool NewOcc(false); + + { + OhmmsAttributeSet oAttrib; + oAttrib.add(spinSet, "spindataset"); + oAttrib.add(spo_object_name, "name"); + oAttrib.add(spo_object_name, "id"); + oAttrib.put(cur); + } + + xmlNodePtr spo_cur = cur; + cur = cur->children; + while (cur != NULL) { + std::string cname((const char*)(cur->name)); + if (cname == "occupation") { + std::string occ_mode("ground"); + occ_format = "energy"; + particle_hole_pairs = 0; + OhmmsAttributeSet oAttrib; + oAttrib.add(occ_mode, "mode"); + oAttrib.add(spinSet, "spindataset"); + oAttrib.add(occ_format, "format"); + oAttrib.add(particle_hole_pairs, "pairs"); + oAttrib.put(cur); + if (occ_mode == "excited") + putContent(Occ, cur); + else if (occ_mode != "ground") + this->myComm->barrier_and_abort( + "EinsplineSetBuilder::createSPOSet Only ground state " + "occupation " + "currently supported in EinsplineSetBuilder."); + } + cur = cur->next; + } + if (Occ != last_occ) { + NewOcc = true; + } + else + NewOcc = false; +#if defined(MIXED_PRECISION) + app_log() << "\t MIXED_PRECISION=1 Overwriting the einspline storage to " + "single precision.\n"; + spo_prec = "single"; // overwrite +#endif + H5OrbSet aset(H5FileName, spinSet, numOrbs); + const auto iter = SPOSetMap.find(aset); + if ((iter != SPOSetMap.end()) && (!NewOcc)) + app_warning() + << "!!!!!!! Identical SPOSets are detected by EinsplineSetBuilder! " + "Implicit sharing one SPOSet for spin-up and spin-down " + "electrons has been removed. " + "Each determinant creates its own SPOSet with dedicated memory " + "for spline coefficients. " + "To avoid increasing the memory footprint of spline " + "coefficients, " + "create a single SPOset outside the determinantset using " + "'sposet_collection' " + "and reference it by name on the determinant line." + << std::endl; + + if (FullBands[spinSet] == 0) + FullBands[spinSet] = std::make_unique>(); + + // Ensure the first SPO set must be spinSet==0 + // to correctly initialize key data of EinsplineSetBuilder + if (SPOSetMap.size() == 0 && spinSet != 0) + this->myComm->barrier_and_abort( + "The first SPO set must have spindataset=\"0\""); + + // set the internal parameters + if (spinSet == 0) + set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); + + ////////////////////////////////// + // Create the OrbitalSet object + ////////////////////////////////// + Timer mytimer; + mytimer.restart(); + OccupyBands(spinSet, sortBands, numOrbs, skipChecks); + if (spinSet == 0) + TileIons(); + + bool use_single = (spo_prec == "single" || spo_prec == "float"); + + // safeguard for a removed feature + if (truncate == "yes") + this->myComm->barrier_and_abort( + "The 'truncate' feature of spline SPO has been removed. Please use " + "hybrid orbital representation."); + + createBsplineReader(use_single, hybrid_rep == "yes", useGPU); + + MixedSplineReader->setCommon(XMLRoot); + // temporary disable the following function call, Ye Luo + // RotateBands_ESHDF(spinSet, + // dynamic_cast >*>(OrbitalSet)); + bcastSortBands(spinSet, NumDistinctOrbitals, this->myComm->rank() == 0); + auto OrbitalSet = MixedSplineReader->create_spline_set(spinSet, spo_cur); + if (!OrbitalSet) + this->myComm->barrier_and_abort("Failed to create SPOSet*"); + app_log() << "Time spent in creating B-spline SPOs " << mytimer.elapsed() + << "sec" << std::endl; + OrbitalSet->finalizeConstruction(); + SPOSetMap[aset] = OrbitalSet.get(); + return OrbitalSet; +} + +template +void +EinsplineSetBuilderT::createBsplineReader( + bool useSingle, bool hybridRep, const std::string& useGPU) +{ + if (use_real_splines_) { + // if(TargetPtcl.Lattice.SuperCellEnum != SUPERCELL_BULK && + // truncate=="yes") + if (MixedSplineReader == 0) { + if (useSingle) + MixedSplineReader = + createBsplineRealSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = + createBsplineRealDoubleT(this, hybridRep, useGPU); + } + } + else { + if (MixedSplineReader == 0) { + if (useSingle) + MixedSplineReader = + createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = + createBsplineComplexDoubleT(this, hybridRep, useGPU); + } + } +} + +template <> +void +EinsplineSetBuilderT>::createBsplineReader( + bool useSingle, bool hybridRep, const std::string& useGPU) +{ + if (MixedSplineReader == 0) { + if (useSingle) + MixedSplineReader = + createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = + createBsplineComplexDoubleT(this, hybridRep, useGPU); + } +} + +template <> +void +EinsplineSetBuilderT>::createBsplineReader( + bool useSingle, bool hybridRep, const std::string& useGPU) +{ + if (MixedSplineReader == 0) { + if (useSingle) + MixedSplineReader = + createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = + createBsplineComplexDoubleT(this, hybridRep, useGPU); + } +} + +template +std::unique_ptr> +EinsplineSetBuilderT::createSPOSet( + xmlNodePtr cur, SPOSetInputInfo& input_info) +{ + if (MixedSplineReader == 0) + this->myComm->barrier_and_abort( + "EinsplineSetExtended cannot create a SPOSet"); + + std::string aname; + int spinSet(0); + OhmmsAttributeSet a; + a.add(spinSet, "spindataset"); + a.add(spinSet, "group"); + a.put(cur); + + // allow only non-overlapping index sets and use the max index as the + // identifier + int norb = input_info.max_index(); + H5OrbSet aset(H5FileName, spinSet, norb); + + auto bspline_zd = + MixedSplineReader->create_spline_set(spinSet, cur, input_info); + if (bspline_zd) + SPOSetMap[aset] = bspline_zd.get(); + return bspline_zd; +} + +template +bool +EinsplineSetBuilderT::ReadOrbitalInfo(bool skipChecks) +{ + if (!H5File.open(H5FileName, H5F_ACC_RDONLY)) { + app_error() << "Could not open HDF5 file \"" << H5FileName + << "\" in EinsplineSetBuilder::ReadOrbitalInfo.\n"; + return false; + } + + // Read format + std::string format; + H5File.read(format, "/format"); + H5File.read(Version, "/version"); + app_log() << " HDF5 orbital file version " << Version[0] << "." + << Version[1] << "." << Version[2] << "\n"; + if (format.find("ES") < format.size()) { + Format = ESHDF; + return ReadOrbitalInfo_ESHDF(skipChecks); + } + + app_error() << "EinsplineSetBuilder::ReadOrbitalInfo too old h5 file which " + "is not in ESHDF format! Regenerate the h5 file"; + return false; +} + +template +bool +EinsplineSetBuilderT::ReadGvectors_ESHDF() +{ + bool root = this->myComm->rank() == 0; + // this is always ugly + MeshSize = 0; + int hasPsig = 1; + if (root) { + H5File.readEntry(MeshSize, "/electrons/psi_r_mesh"); + H5File.readEntry(MeshSize, "/electrons/mesh"); + } + this->myComm->bcast(MeshSize); + hasPsig = (MeshSize[0] == 0); + if (hasPsig) { + int nallowed = 257; + int allowed[] = {72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, + 144, 150, 160, 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, + 270, 288, 300, 320, 324, 360, 375, 384, 400, 405, 432, 450, 480, + 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720, 729, 750, + 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, + 1200, 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, + 1620, 1728, 1800, 1875, 1920, 1944, 2000, 2025, 2048, 2160, 2187, + 2250, 2304, 2400, 2430, 2500, 2560, 2592, 2700, 2880, 2916, 3000, + 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840, 3888, + 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, + 5184, 5400, 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, + 6561, 6750, 6912, 7200, 7290, 7500, 7680, 7776, 8000, 8100, 8192, + 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000, 10125, 10240, + 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, + 12500, 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, + 15360, 15552, 15625, 16000, 16200, 16384, 16875, 17280, 17496, + 18000, 18225, 18432, 18750, 19200, 19440, 19683, 20000, 20250, + 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, + 24576, 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, + 29160, 30000, 30375, 30720, 31104, 31250, 32000, 32400, 32768, + 32805, 33750, 34560, 34992, 36000, 36450, 36864, 37500, 38400, + 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, + 46080, 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, + 51840, 52488, 54000, 54675, 55296, 56250, 57600, 58320, 59049, + 60000, 60750, 61440, 62208, 62500, 64000, 64800, 65536}; + MaxNumGvecs = 0; + // std::set > Gset; + // Read k-points for all G-vectors and take the union + TinyVector maxIndex(0, 0, 0); + Gvecs.resize(NumTwists); + { + int numg = 0; + if (root) { + std::ostringstream Gpath; + Gpath << "/electrons/kpoint_0/gvectors"; + H5File.read(Gvecs[0], Gpath.str()); + numg = Gvecs[0].size(); + } + this->myComm->bcast(numg); + if (!root) + Gvecs[0].resize(numg); + this->myComm->bcast(Gvecs[0]); + MaxNumGvecs = Gvecs[0].size(); + for (int ig = 0; ig < Gvecs[0].size(); ig++) { + maxIndex[0] = std::max(maxIndex[0], std::abs(Gvecs[0][ig][0])); + maxIndex[1] = std::max(maxIndex[1], std::abs(Gvecs[0][ig][1])); + maxIndex[2] = std::max(maxIndex[2], std::abs(Gvecs[0][ig][2])); + } + // for (int ig=0; ig=2 up to 65536 + int* ix = std::lower_bound(allowed, allowed + nallowed, MeshSize[0]); + int* iy = std::lower_bound(allowed, allowed + nallowed, MeshSize[1]); + int* iz = std::lower_bound(allowed, allowed + nallowed, MeshSize[2]); + MeshSize[0] = + (MeshSize[0] > 128) ? *ix : (MeshSize[0] + MeshSize[0] % 2); + MeshSize[1] = + (MeshSize[1] > 128) ? *iy : (MeshSize[1] + MeshSize[1] % 2); + MeshSize[2] = + (MeshSize[2] > 128) ? *iz : (MeshSize[2] + MeshSize[2] % 2); + if (Version[0] < 2) { + // get the map for each twist, but use the MeshSize from kpoint_0 + app_log() << " ESHDF::Version " << Version << std::endl; + app_log() << " Assumes distinct Gvecs set for different twists. " + "Regenerate orbital files using updated QE." + << std::endl; + for (int k = 0; k < DistinctTwists.size(); ++k) { + int ik = DistinctTwists[k]; + if (ik == 0) + continue; // already done + int numg = 0; + if (root) { + std::ostringstream Gpath; + Gpath << "/electrons/kpoint_" << ik << "/gvectors"; + H5File.read(Gvecs[ik], Gpath.str()); + numg = Gvecs[ik].size(); + } + this->myComm->bcast(numg); + if (numg == 0) { + // copy kpoint_0, default + Gvecs[ik] = Gvecs[0]; + } + else { + if (numg != MaxNumGvecs) { + std::ostringstream o; + o << "Twist " << ik + << ": The number of Gvecs is different from kpoint_0." + << " This is not supported anymore. Rerun " + "pw2qmcpack.x or equivalent"; + APP_ABORT(o.str()); + } + if (!root) + Gvecs[ik].resize(numg); + this->myComm->bcast(Gvecs[ik]); + } + } + } + } + app_log() << "B-spline mesh factor is " << MeshFactor << std::endl; + app_log() << "B-spline mesh size is (" << MeshSize[0] << ", " << MeshSize[1] + << ", " << MeshSize[2] << ")\n"; + app_log() << "Maxmimum number of Gvecs " << MaxNumGvecs << std::endl; + app_log().flush(); + return hasPsig; +} + +template class EinsplineSetBuilderT; +template class EinsplineSetBuilderT; +template class EinsplineSetBuilderT>; +template class EinsplineSetBuilderT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSetBuilderT.h new file mode 100644 index 0000000000..b7d6e3658e --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.h @@ -0,0 +1,334 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jaron T. Krogel, +// krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim +// Kim, jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Ye Luo, yeluo@anl.gov, Argonne National +// Laboratory Raymond Clay III, j.k.rofling@gmail.com, +// Lawrence Livermore National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file EinsplineSetBuilder.h + * + * Builder class for einspline-based SPOSet objects. + */ +#ifndef QMCPLUSPLUS_EINSPLINE_SET_BUILDERT_H +#define QMCPLUSPLUS_EINSPLINE_SET_BUILDERT_H + +#include "QMCWaveFunctions/BandInfo.h" +#include "QMCWaveFunctions/EinsplineSetBuilder.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" + +#include +#include + +// #define PW_COEFF_NORM_TOLERANCE 1e-6 + +class Communicate; + +namespace qmcplusplus +{ +/// forward declaration of BsplineReaderBase +template +class BsplineReaderBaseT; + +// Helper needed for TwistMap +// struct Int3less +// { +// bool +// operator()(const TinyVector& a, const TinyVector& b) +// const +// { +// if (a[0] > b[0]) +// return false; +// if (a[0] < b[0]) +// return true; +// if (a[1] > b[1]) +// return false; +// if (a[1] < b[1]) +// return true; +// if (a[2] > b[2]) +// return false; +// if (a[2] < b[2]) +// return true; +// return false; +// } +// }; +// struct Int4less +// { +// bool +// operator()(const TinyVector& a, const TinyVector& b) +// const +// { +// for (int i = 0; i < 4; i++) { +// if (a[i] > b[i]) +// return false; +// if (a[i] < b[i]) +// return true; +// } +// return false; +// } +// }; + +/** construct a name for spline SPO set + */ +// struct H5OrbSet +// { +// /// index for the spin set +// int SpinSet; +// /// number of orbitals that belong to this set +// int NumOrbs; +// /// name of the HDF5 file +// std::filesystem::path FileName; +// /** true if a < b +// * +// * The ordering +// * - name +// * - spin set +// * - number of orbitals +// */ +// bool +// operator()(const H5OrbSet& a, const H5OrbSet& b) const +// { +// if (a.FileName == b.FileName) { +// if (a.SpinSet == b.SpinSet) +// return a.NumOrbs < b.NumOrbs; +// else +// return a.SpinSet < b.SpinSet; +// } +// else +// return a.FileName < b.FileName; +// } + +// H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) : +// SpinSet(spinSet), +// NumOrbs(numOrbs), +// FileName(std::move(name)) +// { +// } +// H5OrbSet() = default; +// }; + +/** EinsplineSet builder + */ +template +class EinsplineSetBuilderT : public SPOSetBuilderT +{ +public: + static constexpr auto DIM = ParticleSetT::DIM; + + using PSetMap = + std::map>>; + using UnitCellType = + CrystalLattice::Scalar_t, DIM>; + using RealType = typename SPOSetBuilderT::RealType; + using PosType = typename SPOSetBuilderT::PosType; + using ComplexType = typename SPOSetT::ComplexType; + + /// reference to the particleset pool + const PSetMap& ParticleSets; + /// quantum particle set + ParticleSetT& TargetPtcl; + /// ionic system + ParticleSetT* SourcePtcl; + + /** Helper vector for sorting bands + */ + std::vector>> FullBands; + + /// reader to use BsplineReaderBase + std::unique_ptr> MixedSplineReader; + + /// This is true if we have the orbital derivatives w.r.t. the ion positions + bool HaveOrbDerivs; + /// root XML node with href, sort, tilematrix, twistnum, source, + /// precision,truncate,version + xmlNodePtr XMLRoot; + + std::map*, H5OrbSet> SPOSetMap; + + /// constructor + EinsplineSetBuilderT(ParticleSetT& p, const PSetMap& psets, + Communicate* comm, xmlNodePtr cur); + + /// destructor + ~EinsplineSetBuilderT() override; + + /** initialize the Antisymmetric wave function for electrons + * @param cur the current xml node + */ + std::unique_ptr> + createSPOSetFromXML(xmlNodePtr cur) override; + + /** initialize with the existing SPOSet */ + std::unique_ptr> + createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) override; + + ////////////////////////////////////// + // HDF5-related data and functions // + ////////////////////////////////////// + hdf_archive H5File; + std::filesystem::path H5FileName; + // HDF5 orbital file version + typedef enum + { + QMCPACK, + ESHDF + } FormatType; + FormatType Format; + TinyVector Version; + std::string parameterGroup, ionsGroup, eigenstatesGroup; + std::vector Occ; + bool + ReadOrbitalInfo(bool skipChecks = false); + bool + ReadOrbitalInfo_ESHDF(bool skipChecks = false); + void + BroadcastOrbitalInfo(); + bool + CheckLattice(); + + /** read gvectors for each twist + * @return true, if psi_g is found + */ + bool + ReadGvectors_ESHDF(); + + Tensor Lattice, RecipLattice, LatticeInv, SuperLattice, + GGt; + UnitCellType SuperCell, PrimCell, PrimCellInv; + int NumBands, NumElectrons, NumSpins, NumTwists; + int MaxNumGvecs; + double MeshFactor; + RealType MatchingTol; + TinyVector MeshSize; + std::vector>> Gvecs; + + Vector IonTypes; + Vector> IonPos; + // mapping the ions in the supercell to the primitive cell + std::vector Super2Prim; + + ///////////////////////////// + // Twist angle information // + ///////////////////////////// + // The "true" twist number after analyzing twistnum, twist XML input and h5 + int twist_num_; + // primitive cell k-points from DFT calculations + std::vector> primcell_kpoints; + // primitive cell to supercell tiling matrix + Tensor TileMatrix; + // This vector stores which twist indices will be used by this clone + std::vector> UseTwists; + std::vector IncludeTwists, DistinctTwists; + /// if false, splines are conceptually complex valued + bool use_real_splines_; + int NumDistinctOrbitals; + // This is true if the corresponding twist in DistinctTwists should + // should be used to generate two distinct orbitals from the real and + // imaginary parts. + std::vector MakeTwoCopies; + // This maps a 3-integer twist index into the twist number in the file + std::map, int, Int3less> TwistMap; + + bool + TwistPair(PosType a, PosType b) const; + void + TileIons(); + void + OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks = false); + void + OccupyBands_ESHDF(int spin, int sortBands, int numOrbs); + + //////////////////////////////// + // Atomic orbital information // + //////////////////////////////// + struct CenterInfo + { + std::vector lmax, spline_npoints, GroupID; + std::vector spline_radius, cutoff, inner_cutoff, + non_overlapping_radius; + std::vector> ion_pos; + int Ncenters; + + CenterInfo() : Ncenters(0){}; + + void + resize(int ncenters) + { + Ncenters = ncenters; + lmax.resize(ncenters, -1); + spline_npoints.resize(ncenters, -1); + GroupID.resize(ncenters, 0); + spline_radius.resize(ncenters, -1.0); + inner_cutoff.resize(ncenters, -1.0); + non_overlapping_radius.resize(ncenters, -1.0); + cutoff.resize(ncenters, -1.0); + ion_pos.resize(ncenters); + } + } AtomicCentersInfo; + + // This returns the path in the HDF5 file to the group for orbital + // with twist ti and band bi + std::string + OrbitalPath(int ti, int bi); + + ///////////////////////////////////////////////////////////// + // Information to avoid storing the same orbitals twice in // + // spin-restricted calculations. // + ///////////////////////////////////////////////////////////// + int LastSpinSet, NumOrbitalsRead; + + std::string occ_format; + int particle_hole_pairs; + bool makeRotations; + +protected: + /** broadcast SortBands + * @param N number of state + * @param root true if it is the i/o node + */ + void + bcastSortBands(int splin, int N, bool root); + + /** a specific but clean code path in createSPOSetFromXML, for PBC, double, + * ESHDF + * @param cur the current xml node + */ + void + set_metadata(int numOrbs, int twist_num_inp, + const TinyVector& twist_inp, + bool skipChecks = false); + + void + createBsplineReader( + bool useSingle, bool hybridRep, const std::string& useGPU); + + /** analyze twists of orbitals in h5 and determinine twist_num_ + * @param twist_num_inp twistnum XML input + * @param twist_inp twst XML input + */ + void + AnalyzeTwists2(const int twist_num_inp, + const TinyVector& twist_inp); + + /// twistnum_inp == -9999 to indicate no given input after parsing XML + static constexpr int TWISTNUM_NO_INPUT = -9999; + /// twist_inp[i] <= -9999 to indicate no given input after parsing XML + static constexpr double TWIST_NO_INPUT = -9999; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/OrbitalSetTraits.h b/src/QMCWaveFunctions/OrbitalSetTraits.h index 7b35937067..881532fcef 100644 --- a/src/QMCWaveFunctions/OrbitalSetTraits.h +++ b/src/QMCWaveFunctions/OrbitalSetTraits.h @@ -54,6 +54,7 @@ struct OrbitalSetTraits //: public OrbitalTraits DIM = OHMMS_DIM }; using RealType = RealAlias; + using ComplexType = std::complex; using ValueType = T; using IndexType = int; using PosType = TinyVector; diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h index f3fd993c5c..98c3743f7a 100644 --- a/src/QMCWaveFunctions/SPOSetT.h +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -78,6 +78,7 @@ class SPOSetT : public QMCTraits Array>; // [walker, Orbs] using PosType = typename OrbitalSetTraits::PosType; using RealType = typename OrbitalSetTraits::RealType; + using ComplexType = typename OrbitalSetTraits::ComplexType; using ValueType = typename OrbitalSetTraits::ValueType; using FullRealType = typename OrbitalSetTraits::RealType; template diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp index 1090397ad1..bac10a6ec8 100644 --- a/src/QMCWaveFunctions/SpinorSetT.cpp +++ b/src/QMCWaveFunctions/SpinorSetT.cpp @@ -183,7 +183,7 @@ SpinorSetT::mw_evaluateVGLWithSpin( const RefVector& psi_v_list, const RefVector& dpsi_v_list, const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const + OffloadMatrix& mw_dspin) const { auto& spo_leader = spo_list.template getCastedLeader>(); auto& P_leader = P_list.getLeader(); diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h index 08990e350b..08d869b112 100644 --- a/src/QMCWaveFunctions/SpinorSetT.h +++ b/src/QMCWaveFunctions/SpinorSetT.h @@ -40,6 +40,7 @@ class SpinorSetT : public SPOSetT template using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; using IndexType = OHMMS_INDEXTYPE; /** constructor */ @@ -129,7 +130,7 @@ class SpinorSetT : public SPOSetT const RefVector& psi_v_list, const RefVector& dpsi_v_list, const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; + OffloadMatrix& mw_dspin) const override; /** evaluate the values, gradients and laplacians of this single-particle * orbital sets and determinant ratio and grads of multiple walkers. Device diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt index b414f0158b..ee68f38a1e 100644 --- a/src/QMCWaveFunctions/tests/CMakeLists.txt +++ b/src/QMCWaveFunctions/tests/CMakeLists.txt @@ -112,6 +112,7 @@ set(SPOSET_SRC test_pw.cpp test_ConstantSPOSet.cpp test_ConstantSPOSetT.cpp + test_RotatedSPOsT.cpp ${MO_SRCS}) if(NiO_a16_H5_FOUND) set(SPOSET_SRC ${SPOSET_SRC} test_einset_NiO_a16.cpp) diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp new file mode 100644 index 0000000000..24a5087f79 --- /dev/null +++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp @@ -0,0 +1,1024 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National +// Laboratories +// +// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National +// Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#include "FakeSPOT.h" +#include "OhmmsData/Libxml2Doc.h" +#include "OhmmsPETE/OhmmsMatrix.h" +#include "Particle/ParticleSetPoolT.h" +#include "Particle/ParticleSetT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "QMCWaveFunctions/RotatedSPOsT.h" +#include "QMCWaveFunctions/WaveFunctionComponent.h" +#include "catch.hpp" +#include "checkMatrix.hpp" +#include "type_traits/ConvertToReal.h" +#include "type_traits/template_types.hpp" +#include +#include + +#include +#include + +using std::string; + +namespace qmcplusplus +{ +template +struct ValueApproxHelper +{ + using Type = Catch::Detail::Approx; +}; +template +struct ValueApproxHelper> +{ + using Type = Catch::Detail::ComplexApprox; +}; + +template +using ValueApprox = typename ValueApproxHelper::Type; + +namespace testing +{ +OptVariablesType& +getMyVars(SPOSetT& rot) +{ + return rot.myVars; +} +OptVariablesType& +getMyVars(SPOSetT& rot) +{ + return rot.myVars; +} +OptVariablesType& +getMyVarsFull(RotatedSPOsT& rot) +{ + return rot.myVarsFull; +} +OptVariablesType& +getMyVarsFull(RotatedSPOsT& rot) +{ + return rot.myVarsFull; +} +std::vector>& +getHistoryParams(RotatedSPOsT& rot) +{ + return rot.history_params_; +} + +std::vector>& +getHistoryParams(RotatedSPOsT& rot) +{ + return rot.history_params_; +} +} // namespace testing + +/* + JPT 04.01.2022: Adapted from test_einset.cpp + Test the spline rotated machinery for SplineR2R (extend to others later). +*/ +TEMPLATE_TEST_CASE( + "RotatedSPOs via SplineR2R", "[wavefunction][template]", double, float) +{ + using RealType = typename SPOSetT::RealType; + + /* + BEGIN Boilerplate stuff to make a simple SPOSet. Copied from + test_einset.cpp + */ + + Communicate* c = OHMMS::Controller; + + // We get a "Mismatched supercell lattices" error due to default ctor? + typename ParticleSetT::ParticleLayout lattice; + + // diamondC_1x1x1 + lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, + 3.37316115, 0.0, 3.37316115}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + // LAttice seems fine after this point... + + auto ions_uptr = + std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = + std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions_(*ions_uptr); + ParticleSetT& elec_(*elec_uptr); + + ions_.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions_.create({2}); + ions_.R[0] = {0.0, 0.0, 0.0}; + ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; + elec_.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec_.create({2}); + elec_.R[0] = {0.0, 0.0, 0.0}; + elec_.R[1] = {0.0, 1.0, 0.0}; + SpeciesSet& tspecies = elec_.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // diamondC_1x1x1 - 8 bands available + const char* particles = R"( + + +)"; + + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); + + xmlNodePtr root = doc.getRoot(); + + xmlNodePtr ein1 = xmlFirstElementChild(root); + + EinsplineSetBuilderT einSet(elec_, ptcl.getPool(), c, ein1); + auto spo = einSet.createSPOSetFromXML(ein1); + REQUIRE(spo); + + /* + END Boilerplate stuff. Now we have a SplineR2R wavefunction + ready for rotation. What follows is the actual test. + */ + + // SplineR2R only for the moment, so skip if QMC_COMPLEX is set +#if !defined(QMC_COMPLEX) + + spo->storeParamsBeforeRotation(); + // 1.) Make a RotatedSPOs object so that we can use the rotation routines + auto rot_spo = std::make_unique>( + "one_rotated_set", std::move(spo)); + + // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 8); + + // 2.) Get data for unrotated orbitals. Check that there's no rotation + rot_spo->buildOptVariables(elec_.R.size()); + typename SPOSetT::ValueMatrix psiM_bare( + elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare( + elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare( + elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose( + elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // This stuff checks that no rotation was applied. Copied from + // test_einset.cpp. value + CHECK(std::real(psiM_bare[1][0]) == ValueApprox(-0.8886948824)); + CHECK(std::real(psiM_bare[1][1]) == ValueApprox(1.4194120169)); + // grad + CHECK( + std::real(dpsiM_bare[1][0][0]) == ValueApprox(-0.0000183403)); + CHECK( + std::real(dpsiM_bare[1][0][1]) == ValueApprox(0.1655139178)); + CHECK( + std::real(dpsiM_bare[1][0][2]) == ValueApprox(-0.0000193077)); + CHECK( + std::real(dpsiM_bare[1][1][0]) == ValueApprox(-1.3131694794)); + CHECK( + std::real(dpsiM_bare[1][1][1]) == ValueApprox(-1.1174004078)); + CHECK( + std::real(dpsiM_bare[1][1][2]) == ValueApprox(-0.8462534547)); + // lapl + CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox(1.3313053846)); + CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox(-4.712583065)); + + /* + 3.) Apply a rotation to the orbitals + To do this, construct a params vector and call the + RotatedSPOs::apply_rotation(params) method. That should do the + right thing for this particular spline class. + + For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params. + */ + const auto rot_size = rot_spo->m_act_rot_inds.size(); + REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 + std::vector param(rot_size); + for (auto i = 0; i < rot_size; i++) { + param[i] = 0.01 * static_cast(i); + } + rot_spo->apply_rotation( + param, false); // Expect this to call SplineR2R::applyRotation() + + // 4.) Get data for rotated orbitals. + typename SPOSetT::ValueMatrix psiM_rot( + elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_rot( + elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_rot( + elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose( + elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); + + /* + Manually encode the unitary transformation. Ugly, but it works. + @TODO: Use the total rotation machinery when it's implemented + + NB: This is truncated to 5 sig-figs, so there is some slop here as + compared to what is done in the splines via apply_rotation(). + So below we reduce the threshold for comparison. This can + probably be ditched once we have a way to grab the actual + rotation matrix... + */ + typename SPOSetT::ValueMatrix rot_mat( + orbitalsetsize, orbitalsetsize); + rot_mat[0][0] = 0.99726; + rot_mat[0][1] = -0.00722; + rot_mat[0][2] = 0.00014; + rot_mat[0][3] = -0.00982; + rot_mat[0][4] = -0.01979; + rot_mat[0][5] = -0.02976; + rot_mat[0][6] = -0.03972; + rot_mat[0][7] = -0.04969; + rot_mat[1][0] = -0.00722; + rot_mat[1][1] = 0.97754; + rot_mat[1][2] = -0.05955; + rot_mat[1][3] = -0.06945; + rot_mat[1][4] = -0.07935; + rot_mat[1][5] = -0.08925; + rot_mat[1][6] = -0.09915; + rot_mat[1][7] = -0.10905; + rot_mat[2][0] = -0.00014; + rot_mat[2][1] = 0.05955; + rot_mat[2][2] = 0.99821; + rot_mat[2][3] = -0.00209; + rot_mat[2][4] = -0.00239; + rot_mat[2][5] = -0.00269; + rot_mat[2][6] = -0.00299; + rot_mat[2][7] = -0.00329; + rot_mat[3][0] = 0.00982; + rot_mat[3][1] = 0.06945; + rot_mat[3][2] = -0.00209; + rot_mat[3][3] = 0.99751; + rot_mat[3][4] = -0.00289; + rot_mat[3][5] = -0.00329; + rot_mat[3][6] = -0.00368; + rot_mat[3][7] = -0.00408; + rot_mat[4][0] = 0.01979; + rot_mat[4][1] = 0.07935; + rot_mat[4][2] = -0.00239; + rot_mat[4][3] = -0.00289; + rot_mat[4][4] = 0.99661; + rot_mat[4][5] = -0.00388; + rot_mat[4][6] = -0.00438; + rot_mat[4][7] = -0.00488; + rot_mat[5][0] = 0.02976; + rot_mat[5][1] = 0.08925; + rot_mat[5][2] = -0.00269; + rot_mat[5][3] = -0.00329; + rot_mat[5][4] = -0.00388; + rot_mat[5][5] = 0.99552; + rot_mat[5][6] = -0.00508; + rot_mat[5][7] = -0.00568; + rot_mat[6][0] = 0.03972; + rot_mat[6][1] = 0.09915; + rot_mat[6][2] = -0.00299; + rot_mat[6][3] = -0.00368; + rot_mat[6][4] = -0.00438; + rot_mat[6][5] = -0.00508; + rot_mat[6][6] = 0.99422; + rot_mat[6][7] = -0.00647; + rot_mat[7][0] = 0.04969; + rot_mat[7][1] = 0.10905; + rot_mat[7][2] = -0.00329; + rot_mat[7][3] = -0.00408; + rot_mat[7][4] = -0.00488; + rot_mat[7][5] = -0.00568; + rot_mat[7][6] = -0.00647; + rot_mat[7][7] = 0.99273; + + // Now compute the expected values by hand using the transformation above + double val1 = 0.; + double val2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) { + val1 += psiM_bare[0][i] * rot_mat[i][0]; + val2 += psiM_bare[1][i] * rot_mat[i][0]; + } + + // value + CHECK(std::real(psiM_rot[0][0]) == ValueApprox(val1)); + CHECK(std::real(psiM_rot[1][0]) == ValueApprox(val2)); + + std::vector grad1(3); + std::vector grad2(3); + for (auto j = 0; j < grad1.size(); j++) { + for (auto i = 0; i < rot_mat.size1(); i++) { + grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; + grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; + } + } + + // grad + CHECK( + dpsiM_rot[0][0][0] == ValueApprox(grad1[0]).epsilon(0.0001)); + CHECK( + dpsiM_rot[0][0][1] == ValueApprox(grad1[1]).epsilon(0.0001)); + CHECK( + dpsiM_rot[0][0][2] == ValueApprox(grad1[2]).epsilon(0.0001)); + CHECK( + dpsiM_rot[1][0][0] == ValueApprox(grad2[0]).epsilon(0.0001)); + CHECK( + dpsiM_rot[1][0][1] == ValueApprox(grad2[1]).epsilon(0.0001)); + CHECK( + dpsiM_rot[1][0][2] == ValueApprox(grad2[2]).epsilon(0.0001)); + + double lap1 = 0.; + double lap2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) { + lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; + lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; + } + + // Lapl + CHECK(std::real(d2psiM_rot[0][0]) == + ValueApprox(lap1).epsilon(0.0001)); + CHECK(std::real(d2psiM_rot[1][0]) == + ValueApprox(lap2).epsilon(0.0001)); + +#endif +} + +TEMPLATE_TEST_CASE("RotatedSPOs createRotationIndices", + "[wavefunction][template]", double, float) +{ + // No active-active or virtual-virtual rotations + // Only active-virtual + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + CHECK(rot_ind.size() == 2); + + // Full rotation contains all rotations + // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + CHECK(full_rot_ind.size() == 3); + + nel = 2; + typename RotatedSPOsT::RotationIndices rot_ind2; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind2); + CHECK(rot_ind2.size() == 2); + + typename RotatedSPOsT::RotationIndices full_rot_ind2; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind2); + CHECK(full_rot_ind2.size() == 3); + + nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind3); + CHECK(rot_ind3.size() == 4); + + typename RotatedSPOsT::RotationIndices full_rot_ind3; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind3); + CHECK(full_rot_ind3.size() == 6); +} + +TEMPLATE_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", + "[wavefunction][template]", double, float) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + + ValueMatrix m3(nmo, nmo); + m3 = ValueType(0); + std::vector params = {0.1, 0.2}; + + RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params, m3); + + // clang-format off + std::vector expected_data = { 0.0, -0.1, -0.2, + 0.1, 0.0, 0.0, + 0.2, 0.0, 0.0 }; + // clang-format on + + ValueMatrix expected_m3(expected_data.data(), 3, 3); + + CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result.result) + { + FAIL(check_matrix_result.result_message); + } + + std::vector params_out(2); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( + rot_ind, m3, params_out); + CHECK(params_out[0] == ValueApprox(0.1)); + CHECK(params_out[1] == ValueApprox(0.2)); +} + +// Expected values of the matrix exponential come from gen_matrix_ops.py +TEMPLATE_TEST_CASE("RotatedSPOs exponentiate matrix", + "[wavefunction][template]", double, float) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + std::vector::ValueType> mat1_data = {0.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + RotatedSPOsT::exponentiate_antisym_matrix(m1); + // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(m1(0, 0) == ValueApprox(1.0)); + + // clang-format off + std::vector::ValueType> mat2_data = { 0.0, -0.1, + 0.1, 0.0 }; + // clang-format on + + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + RotatedSPOsT::exponentiate_antisym_matrix(m2); + + // clang-format off + std::vector expected_rot2 = { 0.995004165278026, -0.0998334166468282, + 0.0998334166468282, 0.995004165278026 }; + // clang-format on + + ValueMatrix expected_m2(expected_rot2.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); + CHECKED_ELSE(check_matrix_result2.result) + { + FAIL(check_matrix_result2.result_message); + } + + // clang-format off + std::vector m3_input_data = { 0.0, -0.3, -0.1, + 0.3, 0.0, -0.2, + 0.1, 0.2, 0.0 }; + + + std::vector expected_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, + 0.283164960565074, 0.935754803277919, -0.210191705950743, + 0.127334574917630, 0.180540076694398, 0.975290308953046 }; + + // clang-format on + + ValueMatrix m3(m3_input_data.data(), 3, 3); + ValueMatrix expected_m3(expected_rot3.data(), 3, 3); + + RotatedSPOsT::exponentiate_antisym_matrix(m3); + + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result3.result) + { + FAIL(check_matrix_result3.result_message); + } +} + +TEMPLATE_TEST_CASE( + "RotatedSPOs log matrix", "[wavefunction][template]", double, float) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + std::vector::ValueType> mat1_data = {1.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + typename SPOSetT::ValueMatrix out_m1(1, 1); + RotatedSPOsT::log_antisym_matrix(m1, out_m1); + // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(out_m1(0, 0) == ValueApprox(0.0)); + + // clang-format off + std::vector start_rot2 = { 0.995004165278026, -0.0998334166468282, + 0.0998334166468282, 0.995004165278026 }; + + std::vector::ValueType> mat2_data = { 0.0, -0.1, + 0.1, 0.0 }; + // clang-format on + + ValueMatrix rot_m2(start_rot2.data(), 2, 2); + ValueMatrix out_m2(2, 2); + RotatedSPOsT::log_antisym_matrix(rot_m2, out_m2); + + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); + CHECKED_ELSE(check_matrix_result2.result) + { + FAIL(check_matrix_result2.result_message); + } + + // clang-format off + std::vector start_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, + 0.283164960565074, 0.935754803277919, -0.210191705950743, + 0.127334574917630, 0.180540076694398, 0.975290308953046 }; + + std::vector m3_input_data = { 0.0, -0.3, -0.1, + 0.3, 0.0, -0.2, + 0.1, 0.2, 0.0 }; + // clang-format on + ValueMatrix rot_m3(start_rot3.data(), 3, 3); + ValueMatrix out_m3(3, 3); + RotatedSPOsT::log_antisym_matrix(rot_m3, out_m3); + + typename SPOSetT::ValueMatrix m3(m3_input_data.data(), 3, 3); + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); + CHECKED_ELSE(check_matrix_result3.result) + { + FAIL(check_matrix_result3.result_message); + } +} + +// Test round trip A -> exp(A) -> log(exp(A)) +// The log is multi-valued so this test may fail if the rotation parameters are +// too large. The exponentials will be the same, though +// exp(log(exp(A))) == exp(A) +TEMPLATE_TEST_CASE( + "RotatedSPOs exp-log matrix", "[wavefunction][template]", double, float) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 2; + int nmo = 4; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); + + std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; + + RotatedSPOsT::constructAntiSymmetricMatrix( + rot_ind, params4, rot_m4); + ValueMatrix orig_rot_m4 = rot_m4; + ValueMatrix out_m4(nmo, nmo); + + RotatedSPOsT::exponentiate_antisym_matrix(rot_m4); + + RotatedSPOsT::log_antisym_matrix(rot_m4, out_m4); + + CheckMatrixResult check_matrix_result4 = + checkMatrix(out_m4, orig_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) + { + FAIL(check_matrix_result4.result_message); + } + + std::vector params4out(4); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( + rot_ind, out_m4, params4out); + for (int i = 0; i < params4.size(); i++) { + CHECK(params4[i] == ValueApprox(params4out[i])); + } +} + +TEMPLATE_TEST_CASE( + "RotatedSPOs hcpBe", "[wavefunction][template]", double, float) +{ + using RealType = typename OrbitalSetTraits::RealType; + Communicate* c = OHMMS::Controller; + + typename ParticleSetT::ParticleLayout lattice; + lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, + 0.00000000, 0.00000000, 0.00000000, 6.78114995}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + auto ions_uptr = + std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = + std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions(*ions_uptr); + ParticleSetT& elec(*elec_uptr); + + ions.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions.create({1}); + ions.R[0] = {0.0, 0.0, 0.0}; + + elec.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec.create({1}); + elec.R[0] = {0.0, 0.0, 0.0}; + + SpeciesSet& tspecies = elec.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // Add the attribute save_coefs="yes" to the sposet_builder tag to generate + // the spline file for use in eval_bspline_spo.py + + const char* particles = R"( + + + +)"; + + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); + + xmlNodePtr root = doc.getRoot(); + + xmlNodePtr sposet_builder = xmlFirstElementChild(root); + xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); + + EinsplineSetBuilderT einSet(elec, ptcl.getPool(), c, sposet_builder); + auto spo = einSet.createSPOSetFromXML(sposet_ptr); + REQUIRE(spo); + + spo->storeParamsBeforeRotation(); + auto rot_spo = std::make_unique>( + "one_rotated_set", std::move(spo)); + + // Sanity check for orbs. Expect 1 electron, 2 orbitals + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 2); + + rot_spo->buildOptVariables(elec.R.size()); + + typename SPOSetT::ValueMatrix psiM_bare( + elec.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare( + elec.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare( + elec.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose( + elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // Values generated from eval_bspline_spo.py, the + // generate_point_values_hcpBe function + CHECK( + std::real(psiM_bare[0][0]) == ValueApprox(0.210221765375514)); + CHECK(std::real(psiM_bare[0][1]) == + ValueApprox(-2.984345024542937e-06)); + + CHECK(std::real(d2psiM_bare[0][0]) == + ValueApprox(5.303848362116568)); + + OptVariablesType opt_vars; + rot_spo->checkInVariablesExclusive(opt_vars); + opt_vars.resetIndex(); + rot_spo->checkOutVariables(opt_vars); + rot_spo->resetParametersExclusive(opt_vars); + + using ValueType = TestType; + Vector dlogpsi(1); + Vector dhpsioverpsi(1); + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); + + CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); + CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); + + std::vector params = {0.1}; + rot_spo->apply_rotation(params, false); + + rot_spo->evaluate_notranspose( + elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + CHECK(std::real(psiM_bare[0][0]) == + ValueApprox(0.20917123424337608)); + CHECK(std::real(psiM_bare[0][1]) == + ValueApprox(-0.02099012652669549)); + + CHECK(std::real(d2psiM_bare[0][0]) == + ValueApprox(5.277362065087747)); + + dlogpsi[0] = 0.0; + dhpsioverpsi[0] = 0.0; + + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); + CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); + CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); +} + +// Test construction of delta rotation +TEMPLATE_TEST_CASE("RotatedSPOs construct delta matrix", + "[wavefunction][template]", double, float) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + int nel = 2; + int nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + // rot_ind size is 4 and full rot_ind size is 6 + + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); + + // When comparing with gen_matrix_ops.py, be aware of the order of indices + // in full_rot + // rot_ind is (0,2) (0,3) (1,2) (1,3) + // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) + // The extra indices go at the back + std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; + std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; + std::vector new_params(6); + + RotatedSPOsT::constructDeltaRotation( + delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); + + // clang-format off + std::vector rot_data4 = + { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, + -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, + 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, + 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; + // clang-format on + + ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); + + CheckMatrixResult check_matrix_result4 = + checkMatrix(rot_m4, new_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) + { + FAIL(check_matrix_result4.result_message); + } + + // Reminder: Ordering! + std::vector expected_new_param = {1.6813965019790489, + 0.3623564254653294, -0.05486544454559908, -0.20574472941408453, + -0.9542513302873077, 0.27497788909911774}; + for (int i = 0; i < new_params.size(); i++) + CHECK(new_params[i] == ValueApprox(expected_new_param[i])); + + // Rotated back to original position + + std::vector new_params2(6); + std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; + RotatedSPOsT::constructDeltaRotation(reverse_delta_params, + new_params, rot_ind, full_rot_ind, new_params2, rot_m4); + for (int i = 0; i < new_params2.size(); i++) + CHECK(new_params2[i] == ValueApprox(old_params[i])); +} + +// Test using global rotation +TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters", + "[wavefunction][template]", double, float) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto& full_var = testing::getMyVarsFull(rot2); + CHECK(full_var[0] == ValueApprox(vs[0])); + CHECK(full_var[1] == ValueApprox(vs[1])); + CHECK(full_var[2] == ValueApprox(vs[2])); + CHECK(full_var[3] == ValueApprox(vs[3])); + CHECK(full_var[4] == ValueApprox(0.0)); + CHECK(full_var[5] == ValueApprox(0.0)); +} + +// Test using history list. +TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters history", + "[wavefunction][template]", double, float) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + rot.set_use_global_rotation(false); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp_hist.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp_hist.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto hist = testing::getHistoryParams(rot2); + REQUIRE(hist.size() == 1); + REQUIRE(hist[0].size() == 4); +} + +template +class DummySPOSetWithoutMWT : public SPOSetT +{ +public: + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + + DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT(my_name) + { + } + void + setOrbitalSetSize(int norbs) override + { + } + void + evaluateValue(const ParticleSetT& P, int iat, + typename SPOSetT::ValueVector& psi) override + { + assert(psi.size() == 3); + psi[0] = 123; + psi[1] = 456; + psi[2] = 789; + } + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override + { + } + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + } + std::string + getClassName() const override + { + return this->my_name_; + } +}; + +template +class DummySPOSetWithMWT : public DummySPOSetWithoutMWT +{ +public: + using ValueVector = typename DummySPOSetWithoutMWT::ValueVector; + + DummySPOSetWithMWT(const std::string& my_name) : + DummySPOSetWithoutMWT(my_name) + { + } + void + mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const override + { + for (auto& psi : psi_v_list) { + assert(psi.get().size() == 3); + psi.get()[0] = 321; + psi.get()[1] = 654; + psi.get()[2] = 987; + } + } +}; + +TEMPLATE_TEST_CASE( + "RotatedSPOs mw_ APIs", "[wavefunction][template]", double, float) +{ + // checking that mw_ API works in RotatedSPOs and is not defaulting to + // SPOSet default implementation + { + // First check calling the mw_ APIs for RotatedSPOs, for which the + // underlying implementation just calls the underlying SPOSet mw_ API + // In the case that the underlying SPOSet doesn't specialize the mw_ + // API, the underlying SPOSet will fall back to the default SPOSet mw_, + // which is just a loop over the single walker API. + RotatedSPOsT rot_spo0("rotated0", + std::make_unique>("no mw 0")); + RotatedSPOsT rot_spo1("rotated1", + std::make_unique>("no mw 1")); + RefVectorWithLeader> spo_list( + rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock( + spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list( + elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{ + psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) { + CHECK(psi_v_list[iw].get()[0] == ValueApprox(123)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(456)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(789)); + } + } + { + // In the case that the underlying SPOSet DOES have mw_ specializations, + // we want to make sure that RotatedSPOs are triggering that + // appropriately This will mean that the underlying SPOSets will do the + // appropriate offloading To check this, DummySPOSetWithMW has an + // explicit mw_evaluateValue which sets different values than what gets + // set in evaluateValue. By doing this, we are ensuring that + // RotatedSPOs->mw_evaluaeValue is calling the specialization in the + // underlying SPO and not using the default SPOSet implementation which + // loops over single walker APIs (which have different values enforced + // in + // DummySPOSetWithoutMW + + RotatedSPOsT rot_spo0( + "rotated0", std::make_unique>("mw 0")); + RotatedSPOsT rot_spo1( + "rotated1", std::make_unique>("mw 1")); + RefVectorWithLeader> spo_list( + rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock( + spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list( + elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{ + psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) { + CHECK(psi_v_list[iw].get()[0] == ValueApprox(321)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(654)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(987)); + } + } +} + +} // namespace qmcplusplus diff --git a/src/mpi/mpi_datatype.h b/src/mpi/mpi_datatype.h index 3750fba976..8f3c58e994 100644 --- a/src/mpi/mpi_datatype.h +++ b/src/mpi/mpi_datatype.h @@ -13,6 +13,8 @@ #ifndef QMCPLUSPLUS_MPI_DATATYPEDEFINE_H #define QMCPLUSPLUS_MPI_DATATYPEDEFINE_H +#include "Message/Communicate.h" + #if defined(HAVE_MPI) #include #else From b91b2d694a2bf25b02e0323ac6140db75beaba30 Mon Sep 17 00:00:00 2001 From: Philip Fackler Date: Mon, 25 Sep 2023 15:54:54 -0400 Subject: [PATCH 2/3] Add new bits to RotatedSPOsT --- src/QMCWaveFunctions/RotatedSPOsT.cpp | 146 ++++++++++++++++++++++++++ src/QMCWaveFunctions/RotatedSPOsT.h | 70 ++++++++++++ 2 files changed, 216 insertions(+) diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index 128bca9798..dabdc282a9 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -1688,6 +1688,152 @@ RotatedSPOsT::makeClone() const return myclone; } +template +void +RotatedSPOsT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateDetRatios( + phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); +} + +template +void +RotatedSPOsT::mw_evaluateValue( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); +} + +template +void +RotatedSPOsT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGL( + phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); +} + +template +void +RotatedSPOsT::mw_evaluateVGLWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLWithSpin( + phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); +} + +template +void +RotatedSPOsT::mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLandDetRatioGrads( + phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); +} + +template +void +RotatedSPOsT::mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads, std::vector& spingrads) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, + invRow_ptr_list, phi_vgl_v, ratios, grads, spingrads); +} + +template +void +RotatedSPOsT::mw_evaluate_notranspose( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, + dlogdet_list, d2logdet_list); +} + +template +void +RotatedSPOsT::createResource(ResourceCollection& collection) const +{ + Phi->createResource(collection); +} + +template +void +RotatedSPOsT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.acquireResource(collection, phi_list); +} + +template +void +RotatedSPOsT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const +{ + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.releaseResource(collection, phi_list); +} + +template +RefVectorWithLeader> +RotatedSPOsT::extractPhiRefList( + const RefVectorWithLeader>& spo_list) +{ + auto& spo_leader = spo_list.template getCastedLeader(); + const auto nw = spo_list.size(); + RefVectorWithLeader> phi_list(*spo_leader.Phi); + phi_list.reserve(nw); + for (int iw = 0; iw < nw; iw++) { + RotatedSPOsT& rot = + spo_list.template getCastedElement(iw); + phi_list.emplace_back(*rot.Phi); + } + return phi_list; +} + // Class concrete types from ValueType template class RotatedSPOsT; template class RotatedSPOsT; diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h index 971d2528b3..fa4778a6f4 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.h +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -40,6 +40,8 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT using IndexType = typename SPOSetT::IndexType; using RealType = typename SPOSetT::RealType; using ValueType = typename SPOSetT::ValueType; + using GradType = typename SPOSetT::GradType; + using ComplexType = typename SPOSetT::ComplexType; using FullRealType = typename SPOSetT::FullRealType; using ValueVector = typename SPOSetT::ValueVector; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -49,6 +51,9 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT using HessMatrix = typename SPOSetT::HessMatrix; using GGGVector = typename SPOSetT::GGGVector; using GGGMatrix = typename SPOSetT::GGGMatrix; + using OffloadMWVGLArray = typename SPOSetT::OffloadMWVGLArray; + template + using OffloadMatrix = Matrix>; // constructor RotatedSPOsT( @@ -399,6 +404,68 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT use_global_rot_ = use_global_rotation; } + void + mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + void + mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const override; + + void + mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + void + mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + void + mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const override; + + void + mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const override; + + void + mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void + createResource(ResourceCollection& collection) const override; + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override; + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override; + private: /// true if SPO parameters (orbital rotation parameters) have been supplied /// by input @@ -415,6 +482,9 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT /// Use global rotation or history list bool use_global_rot_ = true; + static RefVectorWithLeader> + extractPhiRefList(const RefVectorWithLeader>& spo_list); + friend OptVariablesType& testing::getMyVarsFull(RotatedSPOsT& rot); friend OptVariablesType& From a0deb0047b2fb48c6a7bb95b69aa1dece14358de Mon Sep 17 00:00:00 2001 From: Philip Fackler Date: Wed, 27 Sep 2023 10:32:21 -0400 Subject: [PATCH 3/3] Bugfix: removed QMC_COMPLEX conditions where no longer needed --- src/Particle/ParticleSetT.h | 5 +- .../BsplineFactory/SplineR2RT.cpp | 31 ++-- .../BsplineFactory/SplineR2RT.h | 6 +- src/QMCWaveFunctions/CMakeLists.txt | 12 +- src/QMCWaveFunctions/EinsplineSetBuilderT.cpp | 156 +++++++++--------- src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h | 4 +- .../PlaneWave/PWOrbitalSetT.h | 4 +- src/QMCWaveFunctions/RotatedSPOsT.cpp | 37 ++--- .../tests/test_RotatedSPOsT.cpp | 3 +- 9 files changed, 138 insertions(+), 120 deletions(-) diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h index 906e092adb..10b627696a 100644 --- a/src/Particle/ParticleSetT.h +++ b/src/Particle/ParticleSetT.h @@ -21,8 +21,6 @@ #ifndef QMCPLUSPLUS_PARTICLESETT_H #define QMCPLUSPLUS_PARTICLESETT_H -#include - #include "DTModes.h" #include "DynamicCoordinatesT.h" #include "MCCoordsT.hpp" @@ -38,6 +36,8 @@ #include "Walker.h" #include "type_traits/template_types.hpp" +#include + namespace qmcplusplus { /// forward declarations @@ -696,6 +696,7 @@ class ParticleSetT : public OhmmsElementBase { myTwist = t; } + inline const SingleParticlePos& getTwist() const { diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp index ce4bb5e8aa..2469d3c1d2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp @@ -17,6 +17,7 @@ #include "SplineR2RT.h" +#include "CPU/BLAS.hpp" #include "Concurrency/OpenMP.h" #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" #include "spline2/MultiBsplineEval.hpp" @@ -125,17 +126,27 @@ SplineR2RT::applyRotation( std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); } - // Apply rotation the dumb way b/c I can't get BLAS::gemm to work... - for (auto i = 0; i < BasisSetSize; i++) { - for (auto j = 0; j < this->OrbitalSetSize; j++) { - const auto cur_elem = Nsplines * i + j; - auto newval{0.}; - for (auto k = 0; k < this->OrbitalSetSize; k++) { - const auto index = i * Nsplines + k; - newval += (*coef_copy_)[index] * rot_mat[k][j]; + if constexpr (std::is_same_v) { + // Here, ST should be equal to ValueType, which will be double for R2R. + // Using BLAS to make things faster + BLAS::gemm('N', 'N', this->OrbitalSetSize, BasisSetSize, + this->OrbitalSetSize, ST(1.0), rot_mat.data(), this->OrbitalSetSize, + coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines); + } + else { + // Here, ST is float but ValueType is double for R2R. Due to issues with + // type conversions, just doing naive matrix multiplication in this case + // to not lose precision on rot_mat + for (IndexType i = 0; i < BasisSetSize; i++) + for (IndexType j = 0; j < this->OrbitalSetSize; j++) { + const auto cur_elem = Nsplines * i + j; + FullPrecValueType newval{0.}; + for (IndexType k = 0; k < this->OrbitalSetSize; k++) { + const auto index = i * Nsplines + k; + newval += (*coef_copy_)[index] * rot_mat[k][j]; + } + spl_coefs[cur_elem] = newval; } - spl_coefs[cur_elem] = newval; - } } } diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index ece156ac1a..1e2a841e13 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -40,8 +40,12 @@ class SplineR2RT : public BsplineSetT using SplineType = typename bspline_traits::SplineType; using BCType = typename bspline_traits::BCType; using DataType = ST; + using RealType = typename SPOSetT::RealType; + using IndexType = typename SPOSetT::IndexType; + using FullPrecValueType = double; using PointType = TinyVector; using SingleSplineType = UBspline_3d_d; + // types for evaluation results using TT = typename BsplineSetT::ValueType; using GGGVector = typename BsplineSetT::GGGVector; @@ -55,8 +59,6 @@ class SplineR2RT : public BsplineSetT using hContainer_type = VectorSoaContainer; using ghContainer_type = VectorSoaContainer; - using RealType = typename SPOSetT::RealType; - private: bool IsGamma; ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 05c1fe018b..78cfb90d62 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -150,11 +150,17 @@ if(OHMMS_DIM MATCHES 3) endif(HAVE_EINSPLINE) # plane wave SPO - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWBasisT.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalBuilder.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + PlaneWave/PWBasis.cpp + PlaneWave/PWBasisT.cpp + PlaneWave/PWOrbitalSetT.cpp + PlaneWave/PWRealOrbitalSetT.cpp + PlaneWave/PWParameterSet.cpp + PlaneWave/PWOrbitalBuilder.cpp) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp PlaneWave/PWOrbitalSetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp) else() - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp PlaneWave/PWRealOrbitalSetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp) endif(QMC_COMPLEX) if(NOT QMC_COMPLEX) diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp index f48ea6348a..46157f9b28 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp @@ -514,19 +514,19 @@ EinsplineSetBuilderT::AnalyzeTwists2( } TargetPtcl.setTwist(superFracs[twist_num_]); -#ifndef QMC_COMPLEX - // Check to see if supercell twist is okay to use with real wave - // functions - for (int dim = 0; dim < OHMMS_DIM; dim++) { - double t = 2.0 * superFracs[twist_num_][dim]; - if (std::abs(t - round(t)) > MatchingTol * 100) { - app_error() - << "Cannot use this super twist with real wavefunctions.\n" - << "Please recompile with QMC_COMPLEX=1.\n"; - APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + if constexpr (!IsComplex_t{}()) { + // Check to see if supercell twist is okay to use with real wave + // functions + for (int dim = 0; dim < OHMMS_DIM; dim++) { + double t = 2.0 * superFracs[twist_num_][dim]; + if (std::abs(t - round(t)) > MatchingTol * 100) { + app_error() + << "Cannot use this super twist with real wavefunctions.\n" + << "Please recompile with QMC_COMPLEX=1.\n"; + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } } } -#endif // Now check to see that each supercell twist has the right twists // to tile the primitive cell orbitals. const int numTwistsNeeded = std::abs(det(TileMatrix)); @@ -574,78 +574,80 @@ EinsplineSetBuilderT::AnalyzeTwists2( IncludeTwists.push_back(superSets[twist_num_][i]); // Now, find out which twists are distinct DistinctTwists.clear(); -#ifndef QMC_COMPLEX - std::vector copyTwists; - for (int i = 0; i < IncludeTwists.size(); i++) { - int ti = IncludeTwists[i]; - PosType twist_i = primcell_kpoints[ti]; - bool distinct = true; - for (int j = i + 1; j < IncludeTwists.size(); j++) { - int tj = IncludeTwists[j]; - PosType twist_j = primcell_kpoints[tj]; - PosType sum = twist_i + twist_j; - PosType diff = twist_i - twist_j; - if (TwistPair(twist_i, twist_j)) - distinct = false; + if constexpr (!IsComplex_t{}()) { + std::vector copyTwists; + for (int i = 0; i < IncludeTwists.size(); i++) { + int ti = IncludeTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + bool distinct = true; + for (int j = i + 1; j < IncludeTwists.size(); j++) { + int tj = IncludeTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + PosType sum = twist_i + twist_j; + PosType diff = twist_i - twist_j; + if (TwistPair(twist_i, twist_j)) + distinct = false; + } + if (distinct) + DistinctTwists.push_back(ti); + else + copyTwists.push_back(ti); } - if (distinct) - DistinctTwists.push_back(ti); - else - copyTwists.push_back(ti); - } - // Now determine which distinct twists require two copies - MakeTwoCopies.resize(DistinctTwists.size()); - for (int i = 0; i < DistinctTwists.size(); i++) { - MakeTwoCopies[i] = false; - int ti = DistinctTwists[i]; - PosType twist_i = primcell_kpoints[ti]; - for (int j = 0; j < copyTwists.size(); j++) { - int tj = copyTwists[j]; - PosType twist_j = primcell_kpoints[tj]; - if (TwistPair(twist_i, twist_j)) - MakeTwoCopies[i] = true; + // Now determine which distinct twists require two copies + MakeTwoCopies.resize(DistinctTwists.size()); + for (int i = 0; i < DistinctTwists.size(); i++) { + MakeTwoCopies[i] = false; + int ti = DistinctTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + for (int j = 0; j < copyTwists.size(); j++) { + int tj = copyTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + if (TwistPair(twist_i, twist_j)) + MakeTwoCopies[i] = true; + } + if (this->myComm->rank() == 0) { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "Using %d copies of twist angle [%6.3f, %6.3f, %6.3f]\n", + MakeTwoCopies[i] ? 2 : 1, twist_i[0], twist_i[1], + twist_i[2]); + if (length < 0) + throw std::runtime_error("Error generating string"); + app_log() << std::string_view(buf.data(), length); + app_log().flush(); + } } - if (this->myComm->rank() == 0) { - std::array buf; - int length = std::snprintf(buf.data(), buf.size(), - "Using %d copies of twist angle [%6.3f, %6.3f, %6.3f]\n", - MakeTwoCopies[i] ? 2 : 1, twist_i[0], twist_i[1], twist_i[2]); - if (length < 0) - throw std::runtime_error("Error generating string"); - app_log() << std::string_view(buf.data(), length); - app_log().flush(); + // Find out if we can make real orbitals + use_real_splines_ = true; + for (int i = 0; i < DistinctTwists.size(); i++) { + int ti = DistinctTwists[i]; + PosType twist = primcell_kpoints[ti]; + for (int j = 0; j < OHMMS_DIM; j++) + if (std::abs(twist[j] - 0.0) > MatchingTol && + std::abs(twist[j] - 0.5) > MatchingTol && + std::abs(twist[j] + 0.5) > MatchingTol) + use_real_splines_ = false; } + if (use_real_splines_ && (DistinctTwists.size() > 1)) { + app_log() << "***** Use of real orbitals is possible, but not " + "currently implemented\n" + << " with more than one twist angle.\n"; + use_real_splines_ = false; + } + if (use_real_splines_) + app_log() << "Using real splines.\n"; + else + app_log() << "Using complex splines.\n"; } - // Find out if we can make real orbitals - use_real_splines_ = true; - for (int i = 0; i < DistinctTwists.size(); i++) { - int ti = DistinctTwists[i]; - PosType twist = primcell_kpoints[ti]; - for (int j = 0; j < OHMMS_DIM; j++) - if (std::abs(twist[j] - 0.0) > MatchingTol && - std::abs(twist[j] - 0.5) > MatchingTol && - std::abs(twist[j] + 0.5) > MatchingTol) - use_real_splines_ = false; - } - if (use_real_splines_ && (DistinctTwists.size() > 1)) { - app_log() << "***** Use of real orbitals is possible, but not " - "currently implemented\n" - << " with more than one twist angle.\n"; + else { + DistinctTwists.resize(IncludeTwists.size()); + MakeTwoCopies.resize(IncludeTwists.size()); + for (int i = 0; i < IncludeTwists.size(); i++) { + DistinctTwists[i] = IncludeTwists[i]; + MakeTwoCopies[i] = false; + } use_real_splines_ = false; } - if (use_real_splines_) - app_log() << "Using real splines.\n"; - else - app_log() << "Using complex splines.\n"; -#else - DistinctTwists.resize(IncludeTwists.size()); - MakeTwoCopies.resize(IncludeTwists.size()); - for (int i = 0; i < IncludeTwists.size(); i++) { - DistinctTwists[i] = IncludeTwists[i]; - MakeTwoCopies[i] = false; - } - use_real_splines_ = false; -#endif } template diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h index 225033214b..5add827a86 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h @@ -15,8 +15,8 @@ /** @file PWOrbitalSet.h * @brief Definition of member functions of Plane-wave basis set */ -#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H -#define QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H +#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H +#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H #include "QMCWaveFunctions/PlaneWave/PWBasis.h" #include "QMCWaveFunctions/SPOSet.h" diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h index d4e13de966..9103a16ee2 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h @@ -18,8 +18,8 @@ /** @file PWOrbitalSetT.h * @brief Definition of member functions of Plane-wave basis set */ -#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H -#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H +#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H +#define QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H #include "CPU/BLAS.hpp" #include "QMCWaveFunctions/PlaneWave/PWBasisT.h" diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index dabdc282a9..1aa8af8ada 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -307,7 +307,6 @@ template void RotatedSPOsT::buildOptVariables(const size_t nel) { -#if !defined(QMC_COMPLEX) /* Only rebuild optimized variables if more after-rotation orbitals are * needed Consider ROHF, there is only one set of SPO for both spin up and * down Nup > Ndown. nel_major_ will be set Nup. @@ -332,7 +331,6 @@ RotatedSPOsT::buildOptVariables(const size_t nel) buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); } -#endif } template @@ -340,7 +338,6 @@ void RotatedSPOsT::buildOptVariables( const RotationIndices& rotations, const RotationIndices& full_rotations) { -#if !defined(QMC_COMPLEX) const size_t nmo = Phi->getOrbitalSetSize(); // create active rotations @@ -419,7 +416,6 @@ RotatedSPOsT::buildOptVariables( param[i] = this->myVars[i]; apply_rotation(param, false); } -#endif } template @@ -858,33 +854,32 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, // possibly replace wit BLAS calls for (int i = 0; i < nel; i++) for (int j = 0; j < nmo; j++) - Bbar(i, j) = d2psiM_all(i, j) + - 2.0 * dot(myG_J[i], dpsiM_all(i, j)) + + Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - const T* const B(Bbar.data()); - ValueMatrix T_mat; + const ValueType* const A(psiM_all.data()); + const ValueType* const Ainv(psiM_inv.data()); + const ValueType* const B(Bbar.data()); + ValueMatrix t; ValueMatrix Y1; ValueMatrix Y2; ValueMatrix Y3; ValueMatrix Y4; - T_mat.resize(nel, nmo); + t.resize(nel, nmo); Y1.resize(nel, nel); Y2.resize(nel, nmo); Y3.resize(nel, nmo); Y4.resize(nel, nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), - T_mat.data(), nmo); - BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), - Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), - nel, T(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), - Y3.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, + ValueType(0.0), t.data(), nmo); + BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, + ValueType(0.0), Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo, + Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, + ValueType(0.0), Y3.data(), nmo); // possibly replace with BLAS call Y4 = Y3 - Y2; @@ -894,8 +889,8 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, if (kk >= 0) { const int p = m_act_rot_inds.at(i).first; const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += T_mat(p, q); - dhpsioverpsi[kk] += T(-0.5) * Y4(p, q); + dlogpsi[kk] += t(p, q); + dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); } } } diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp index 24a5087f79..e5c04d205f 100644 --- a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp @@ -633,7 +633,8 @@ TEMPLATE_TEST_CASE( xmlNodePtr sposet_builder = xmlFirstElementChild(root); xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); - EinsplineSetBuilderT einSet(elec, ptcl.getPool(), c, sposet_builder); + EinsplineSetBuilderT einSet( + elec, ptcl.getPool(), c, sposet_builder); auto spo = einSet.createSPOSetFromXML(sposet_ptr); REQUIRE(spo);