From 9d7395b47eae8d0c1fab1a64576ea73eccb80884 Mon Sep 17 00:00:00 2001 From: Philip Fackler Date: Fri, 1 Sep 2023 11:19:01 -0400 Subject: [PATCH] Further template propagation to fix offload build --- src/Numerics/OneDimGridFactory.cpp | 9 +- src/Numerics/OneDimGridFactory.h | 6 +- src/Numerics/SoaCartesianTensor.h | 2 +- src/Numerics/SoaSphericalTensor.h | 2 + src/Particle/CMakeLists.txt | 9 + src/Particle/DistanceTableT.h | 529 ++++ src/Particle/DynamicCoordinatesT.cpp | 43 + src/Particle/DynamicCoordinatesT.h | 154 + src/Particle/Lattice/LRBreakupParameters.h | 2 +- src/Particle/LongRange/KContainerT.cpp | 272 ++ src/Particle/LongRange/KContainerT.h | 115 + src/Particle/LongRange/StructFactT.cpp | 249 ++ src/Particle/LongRange/StructFactT.h | 159 + src/Particle/MCCoordsT.cpp | 69 + src/Particle/MCCoordsT.hpp | 82 + src/Particle/ParticleSetT.cpp | 1200 +++++++ src/Particle/ParticleSetT.h | 980 ++++++ src/Particle/ParticleSetTraits.h | 85 + src/Particle/RealSpacePositionsT.h | 96 + src/Particle/RealSpacePositionsTOMPTarget.h | 328 ++ src/Particle/SimulationCellT.cpp | 74 + src/Particle/SimulationCellT.h | 71 + src/Particle/SoaDistanceTableAAT.h | 237 ++ src/Particle/SoaDistanceTableAATOMPTarget.h | 624 ++++ src/Particle/SoaDistanceTableABT.h | 170 + src/Particle/SoaDistanceTableABTOMPTarget.h | 513 +++ src/Particle/VirtualParticleSetT.cpp | 272 ++ src/Particle/VirtualParticleSetT.h | 175 ++ src/Particle/createDistanceTableT.cpp | 240 ++ src/Particle/createDistanceTableT.h | 89 + .../createDistanceTableTOMPTarget.cpp | 248 ++ src/QMCWaveFunctions/BasisSetBaseT.h | 222 ++ .../BsplineFactory/BsplineSetT.h | 412 +-- .../BsplineFactory/SplineC2CT.cpp | 10 +- .../BsplineFactory/SplineC2CT.h | 10 +- .../BsplineFactory/SplineR2RT.cpp | 901 +++--- .../BsplineFactory/SplineR2RT.h | 425 +-- src/QMCWaveFunctions/CMakeLists.txt | 24 +- src/QMCWaveFunctions/CompositeSPOSetT.cpp | 10 +- src/QMCWaveFunctions/CompositeSPOSetT.h | 10 +- .../ElectronGas/FreeOrbitalBuilderT.cpp | 6 +- .../ElectronGas/FreeOrbitalBuilderT.h | 4 +- .../ElectronGas/FreeOrbitalT.cpp | 1182 ++++--- .../ElectronGas/FreeOrbitalT.h | 131 +- .../HarmonicOscillator/SHOSetBuilderT.cpp | 333 +- .../HarmonicOscillator/SHOSetBuilderT.h | 67 +- .../HarmonicOscillator/SHOSetT.cpp | 874 +++--- .../HarmonicOscillator/SHOSetT.h | 283 +- src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp | 923 ++++++ src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h | 75 + .../LCAO/CuspCorrectionConstructionT.cpp | 22 +- .../LCAO/CuspCorrectionConstructionT.h | 17 +- .../LCAO/LCAOSpinorBuilderT.cpp | 343 +- .../LCAO/LCAOSpinorBuilderT.h | 82 +- .../LCAO/LCAOrbitalBuilderT.cpp | 303 +- .../LCAO/LCAOrbitalBuilderT.h | 14 +- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp | 1764 ++++++----- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h | 44 +- .../LCAO/LCAOrbitalSetWithCorrectionT.cpp | 82 +- .../LCAO/LCAOrbitalSetWithCorrectionT.h | 74 +- .../LCAO/MultiFunctorAdapter.h | 56 + .../LCAO/RadialOrbitalSetBuilder.h | 2 +- .../LCAO/SoaAtomicBasisSetT.h | 775 +++++ .../LCAO/SoaCuspCorrectionT.cpp | 259 +- .../LCAO/SoaCuspCorrectionT.h | 186 +- .../LCAO/SoaLocalizedBasisSetT.cpp | 469 +++ .../LCAO/SoaLocalizedBasisSetT.h | 190 ++ src/QMCWaveFunctions/OptimizableObjectT.h | 151 + src/QMCWaveFunctions/PlaneWave/PWBasisT.h | 534 ++-- .../PlaneWave/PWOrbitalSetT.cpp | 209 +- .../PlaneWave/PWOrbitalSetT.h | 224 +- src/QMCWaveFunctions/RotatedSPOsT.cpp | 2767 +++++++++-------- src/QMCWaveFunctions/RotatedSPOsT.h | 781 ++--- .../SPOSetBuilderFactoryT.cpp | 50 +- src/QMCWaveFunctions/SPOSetBuilderFactoryT.h | 7 +- src/QMCWaveFunctions/SPOSetScannerT.h | 431 +-- src/QMCWaveFunctions/SPOSetT.cpp | 634 ++-- src/QMCWaveFunctions/SPOSetT.h | 1156 +++---- src/QMCWaveFunctions/SpinorSetT.cpp | 954 +++--- src/QMCWaveFunctions/SpinorSetT.h | 443 +-- src/QMCWaveFunctions/VariableSetT.cpp | 346 +++ src/QMCWaveFunctions/VariableSetT.h | 336 ++ .../tests/ConstantSPOSetT.cpp | 155 +- src/QMCWaveFunctions/tests/ConstantSPOSetT.h | 155 +- src/QMCWaveFunctions/tests/FakeSPOT.cpp | 221 +- src/QMCWaveFunctions/tests/FakeSPOT.h | 65 +- .../tests/test_ConstantSPOSetT.cpp | 4 +- src/type_traits/complex_help.hpp | 3 + 88 files changed, 19254 insertions(+), 8060 deletions(-) create mode 100644 src/Particle/DistanceTableT.h create mode 100644 src/Particle/DynamicCoordinatesT.cpp create mode 100644 src/Particle/DynamicCoordinatesT.h create mode 100644 src/Particle/LongRange/KContainerT.cpp create mode 100644 src/Particle/LongRange/KContainerT.h create mode 100644 src/Particle/LongRange/StructFactT.cpp create mode 100644 src/Particle/LongRange/StructFactT.h create mode 100644 src/Particle/MCCoordsT.cpp create mode 100644 src/Particle/MCCoordsT.hpp create mode 100644 src/Particle/ParticleSetT.cpp create mode 100644 src/Particle/ParticleSetT.h create mode 100644 src/Particle/ParticleSetTraits.h create mode 100644 src/Particle/RealSpacePositionsT.h create mode 100644 src/Particle/RealSpacePositionsTOMPTarget.h create mode 100644 src/Particle/SimulationCellT.cpp create mode 100644 src/Particle/SimulationCellT.h create mode 100644 src/Particle/SoaDistanceTableAAT.h create mode 100644 src/Particle/SoaDistanceTableAATOMPTarget.h create mode 100644 src/Particle/SoaDistanceTableABT.h create mode 100644 src/Particle/SoaDistanceTableABTOMPTarget.h create mode 100644 src/Particle/VirtualParticleSetT.cpp create mode 100644 src/Particle/VirtualParticleSetT.h create mode 100644 src/Particle/createDistanceTableT.cpp create mode 100644 src/Particle/createDistanceTableT.h create mode 100644 src/Particle/createDistanceTableTOMPTarget.cpp create mode 100644 src/QMCWaveFunctions/BasisSetBaseT.h create mode 100644 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp create mode 100644 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h create mode 100644 src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h create mode 100644 src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp create mode 100644 src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h create mode 100644 src/QMCWaveFunctions/OptimizableObjectT.h create mode 100644 src/QMCWaveFunctions/VariableSetT.cpp create mode 100644 src/QMCWaveFunctions/VariableSetT.h diff --git a/src/Numerics/OneDimGridFactory.cpp b/src/Numerics/OneDimGridFactory.cpp index 16a17ec9b3..bc90a9f505 100644 --- a/src/Numerics/OneDimGridFactory.cpp +++ b/src/Numerics/OneDimGridFactory.cpp @@ -13,19 +13,21 @@ #include "OneDimGridFactory.h" +#include "Configuration.h" #include "OhmmsData/AttributeSet.h" #include "Message/UniformCommunicateError.h" namespace qmcplusplus { -std::unique_ptr OneDimGridFactory::createGrid(xmlNodePtr cur) +template +std::unique_ptr::GridType> OneDimGridFactory::createGrid(xmlNodePtr cur) { std::unique_ptr agrid; RealType ri = 1e-5; RealType rf = 100.0; RealType ascale = -1.0e0; RealType astep = 1.25e-2; - IndexType npts = 1001; + QMCTraits::IndexType npts = 1001; std::string gridType("log"); std::string gridID("invalid"); OhmmsAttributeSet radAttrib; @@ -74,4 +76,7 @@ std::unique_ptr OneDimGridFactory::createGrid(xmlNo } return agrid; } + +template struct OneDimGridFactory; +template struct OneDimGridFactory; } // namespace qmcplusplus diff --git a/src/Numerics/OneDimGridFactory.h b/src/Numerics/OneDimGridFactory.h index 6365db25aa..d27b1fb904 100644 --- a/src/Numerics/OneDimGridFactory.h +++ b/src/Numerics/OneDimGridFactory.h @@ -14,15 +14,17 @@ #ifndef QMCPLUSPLUS_ONEDIMGRIDFACTORY_H #define QMCPLUSPLUS_ONEDIMGRIDFACTORY_H -#include "Configuration.h" #include "Numerics/OneDimGridFunctor.h" +#include "Numerics/LibxmlNumericIO.h" namespace qmcplusplus { /** Factory class using Singleton pattern */ -struct OneDimGridFactory : public QMCTraits +template +struct OneDimGridFactory { + using RealType = T; ///typedef of the one-dimensional grid using GridType = OneDimGridBase; diff --git a/src/Numerics/SoaCartesianTensor.h b/src/Numerics/SoaCartesianTensor.h index 21fa7f52bf..540ab826b0 100644 --- a/src/Numerics/SoaCartesianTensor.h +++ b/src/Numerics/SoaCartesianTensor.h @@ -37,7 +37,7 @@ namespace qmcplusplus template struct SoaCartesianTensor { - using value_type = T; + using ValueType = T; using ggg_type = TinyVector, 3>; ///maximum angular momentum diff --git a/src/Numerics/SoaSphericalTensor.h b/src/Numerics/SoaSphericalTensor.h index 56c638b42e..c5e4f3e1ae 100644 --- a/src/Numerics/SoaSphericalTensor.h +++ b/src/Numerics/SoaSphericalTensor.h @@ -37,6 +37,8 @@ namespace qmcplusplus template struct SoaSphericalTensor { + using ValueType = T; + ///maximum angular momentum for the center int Lmax; /// Normalization factors diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt index 42f036d057..b6517626c1 100644 --- a/src/Particle/CMakeLists.txt +++ b/src/Particle/CMakeLists.txt @@ -15,22 +15,30 @@ set(PARTICLE InitMolecularSystem.cpp SimulationCell.cpp + SimulationCellT.cpp ParticleSetPool.cpp ParticleSet.cpp + ParticleSetT.cpp PSdispatcher.cpp VirtualParticleSet.cpp + VirtualParticleSetT.cpp ParticleSet.BC.cpp DynamicCoordinatesBuilder.cpp + DynamicCoordinatesT.cpp MCCoords.cpp + MCCoordsT.cpp MCWalkerConfiguration.cpp WalkerConfigurations.cpp SpeciesSet.cpp SampleStack.cpp createDistanceTableAA.cpp createDistanceTableAB.cpp + createDistanceTableT.cpp HDFWalkerInputManager.cpp LongRange/KContainer.cpp + LongRange/KContainerT.cpp LongRange/StructFact.cpp + LongRange/StructFactT.cpp LongRange/LPQHIBasis.cpp LongRange/LPQHISRCoulombBasis.cpp LongRange/EwaldHandlerQuasi2D.cpp @@ -51,6 +59,7 @@ target_include_directories(qmcparticle PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(qmcparticle PRIVATE platform_cpu_LA) target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime) set(PARTICLE_OMPTARGET_SRCS + createDistanceTableTOMPTarget.cpp createDistanceTableAAOMPTarget.cpp createDistanceTableABOMPTarget.cpp) diff --git a/src/Particle/DistanceTableT.h b/src/Particle/DistanceTableT.h new file mode 100644 index 0000000000..5eaba1bd44 --- /dev/null +++ b/src/Particle/DistanceTableT.h @@ -0,0 +1,529 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Jaron T. Krogel, +// krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A. +// Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H +#define QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H + +#include + +#include "CPU/SIMD/aligned_allocator.hpp" +#include "DTModes.h" +#include "OhmmsPETE/OhmmsMatrix.h" +#include "OhmmsPETE/OhmmsVector.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSetTraits.h" + +namespace qmcplusplus +{ +class ResourceCollection; + +/** @ingroup nnlist + * @brief Abstract class to manage operations on pair data between two + * ParticleSets. + * + * Each DistanceTable object is defined by Source and Target of ParticleSet + * types. This base class doesn't contain storage. It is intended for + * update/compute invoked by ParticleSet. Derived AA/AB classes handle the + * actual storage and data access. + */ +template +class DistanceTableT +{ +public: + static constexpr unsigned DIM = OHMMS_DIM; + + using IndexType = typename ParticleSetTraits::IndexType; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using DistRow = Vector>; + using DisplRow = VectorSoaContainer; + +protected: + // FIXME. once DT takes only DynamicCoordinates, change this type as well. + const ParticleSetT& origin_; + + const size_t num_sources_; + const size_t num_targets_; + + /// name of the table + const std::string name_; + + /// operation modes defined by DTModes + DTModes modes_; + +public: + /// constructor using source and target ParticleSet + DistanceTableT(const ParticleSetT& source, const ParticleSetT& target, + DTModes modes) : + origin_(source), + num_sources_(source.getTotalNum()), + num_targets_(target.getTotalNum()), + name_(source.getName() + "_" + target.getName()), + modes_(modes) + { + } + + /// copy constructor. deleted + DistanceTableT(const DistanceTableT&) = delete; + + /// virutal destructor + virtual ~DistanceTableT() = default; + + /// get modes + inline DTModes + getModes() const + { + return modes_; + } + + /// set modes + inline void + setModes(DTModes modes) + { + modes_ = modes; + } + + /// return the name of table + inline const std::string& + getName() const + { + return name_; + } + + /// returns the reference the origin particleset + const ParticleSetT& + get_origin() const + { + return origin_; + } + + /// returns the number of centers + inline size_t + centers() const + { + return origin_.getTotalNum(); + } + + /// returns the number of centers + inline size_t + targets() const + { + return num_targets_; + } + + /// returns the number of source particles + inline size_t + sources() const + { + return num_sources_; + } + + /** evaluate the full Distance Table + * @param P the target particle set + */ + virtual void + evaluate(ParticleSetT& P) = 0; + virtual void + mw_evaluate(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].evaluate(p_list[iw]); + } + + /** recompute multi walker internal data, recompute + * @param dt_list the distance table batch + * @param p_list the target particle set batch + * @param recompute if true, must recompute. Otherwise, implementation + * dependent. + */ + virtual void + mw_recompute(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& recompute) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + if (recompute[iw]) + dt_list[iw].evaluate(p_list[iw]); + } + + /** evaluate the temporary pair relations when a move is proposed + * @param P the target particle set + * @param rnew proposed new position + * @param iat the particle to be moved + * @param prepare_old if true, prepare (temporary) old distances and + * displacements for using getOldDists and getOldDispls functions in + * acceptMove. + * + * Note: some distance table consumers (WaveFunctionComponent) have + * optimized code paths which require prepare_old = true for accepting a + * move. Drivers/Hamiltonians know whether moves will be accepted or not and + * manage this flag when calling ParticleSet::makeMoveXXX functions. + */ + virtual void + move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, + bool prepare_old = true) = 0; + + /** walker batched version of move. this function may be implemented + * asynchronously. Additional synchroniziation for collecting results should + * be handled by the caller. If DTModes::NEED_TEMP_DATA_ON_HOST, host data + * will be updated. If no consumer requests data on the host, the transfer + * is skipped. + */ + virtual void + mw_move(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& rnew_list, const IndexType iat, + bool prepare_old = true) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); + } + + /** update the distance table by the pair relations from the temporal + * position. Used when a move is accepted in regular mode + * @param iat the particle with an accepted move + */ + virtual void + update(IndexType jat) = 0; + + /** fill partially the distance table by the pair relations from the + * temporary or old particle position. Used in forward mode when a move is + * reject + * @param iat the particle with an accepted move + * @param from_temp if true, copy from temp. if false, copy from old + */ + virtual void + updatePartial(IndexType jat, bool from_temp) + { + if (from_temp) + update(jat); + } + + /** walker batched version of updatePartial. + * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and + * host distance table will not be updated. + */ + virtual void + mw_updatePartial(const RefVectorWithLeader& dt_list, + IndexType jat, const std::vector& from_temp) + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].updatePartial(jat, from_temp[iw]); + } + + /** finalize distance table calculation after particle-by-particle moves + * if update() doesn't make the table up-to-date during p-by-p moves + * finalizePbyP takes action to bring the table up-to-date + */ + virtual void + finalizePbyP(const ParticleSetT& P) + { + } + + /** walker batched version of finalizePbyP + * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not + * updated at all during p-by-p Thus, a recompute is necessary to update the + * whole host distance table for consumers like the Coulomb potential. + */ + virtual void + mw_finalizePbyP(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].finalizePbyP(p_list[iw]); + } + + /** find the first nearest neighbor + * @param iat source particle id + * @param r distance + * @param dr displacement + * @param newpos if true, use the data in temp_r_ and temp_dr_ for the + * proposed move. if false, use the data in distance_[iat] and + * displacements_[iat] + * @return the id of the nearest particle, -1 not found + */ + virtual int + get_first_neighbor( + IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; + + [[noreturn]] inline void + print(std::ostream& os) + { + throw std::runtime_error("DistanceTable::print is not supported"); + } + + /// initialize a shared resource and hand it to a collection + virtual void + createResource(ResourceCollection& collection) const + { + } + + /// acquire a shared resource from a collection + virtual void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& dt_list) const + { + } + + /// return a shared resource to a collection + virtual void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& dt_list) const + { + } +}; + +/** AA type of DistanceTable containing storage */ +template +class DistanceTableAAT : public DistanceTableT +{ +public: + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; + +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual + * storage For only the lower triangle (j=i terms as the nature of + * operator[]. When the storage of the table is allocated as a single memory + * segment, out-of-bound access is still within the segment and thus doesn't + * trigger an alarm by the address sanitizer. + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + * r_A1[i] Note: Derived classes decide if it is a memory view or the actual + * storage only the lower triangle (j displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + + /// old distances + DistRow old_r_; + + /// old displacements + DisplRow old_dr_; + +public: + /// constructor using source and target ParticleSet + DistanceTableAAT(const ParticleSetT& target, DTModes modes) : + DistanceTableT(target, target, modes) + { + } + + /** return full table distances + */ + const std::vector& + getDistances() const + { + return distances_; + } + + /** return full table displacements + */ + const std::vector& + getDisplacements() const + { + return displacements_; + } + + /** return a row of distances for a given target particle + */ + const DistRow& + getDistRow(int iel) const + { + return distances_[iel]; + } + + /** return a row of displacements for a given target particle + */ + const DisplRow& + getDisplRow(int iel) const + { + return displacements_[iel]; + } + + /** return the temporary distances when a move is proposed + */ + const DistRow& + getTempDists() const + { + return temp_r_; + } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& + getTempDispls() const + { + return temp_dr_; + } + + /** return old distances set up by move() for optimized distance table + * consumers + */ + const DistRow& + getOldDists() const + { + return old_r_; + } + + /** return old displacements set up by move() for optimized distance table + * consumers + */ + const DisplRow& + getOldDispls() const + { + return old_dr_; + } + + virtual size_t + get_num_particls_stored() const + { + return 0; + } + + /// return multi walker temporary pair distance table data pointer + [[noreturn]] virtual const RealType* + getMultiWalkerTempDataPtr() const + { + throw std::runtime_error( + this->name_ + " multi walker data pointer for temp not supported"); + } + + virtual const RealType* + mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, size_t range_begin, + size_t range_end) const + { + return nullptr; + } +}; + +/** AB type of DistanceTable containing storage */ +template +class DistanceTableABT : public DistanceTableT +{ +public: + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; + +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual + * storage + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + * r_A1[i] Note: Derived classes decide if it is a memory view or the actual + * storage + */ + std::vector displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + +public: + /// constructor using source and target ParticleSet + DistanceTableABT(const ParticleSetT& source, + const ParticleSetT& target, DTModes modes) : + DistanceTableT(source, target, modes) + { + } + + /** return full table distances + */ + const std::vector& + getDistances() const + { + return distances_; + } + + /** return full table displacements + */ + const std::vector& + getDisplacements() const + { + return displacements_; + } + + /** return a row of distances for a given target particle + */ + const DistRow& + getDistRow(int iel) const + { + return distances_[iel]; + } + + /** return a row of displacements for a given target particle + */ + const DisplRow& + getDisplRow(int iel) const + { + return displacements_[iel]; + } + + /** return the temporary distances when a move is proposed + */ + const DistRow& + getTempDists() const + { + return temp_r_; + } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& + getTempDispls() const + { + return temp_dr_; + } + + /// return multi-walker full (all pairs) distance table data pointer + [[noreturn]] virtual const RealType* + getMultiWalkerDataPtr() const + { + throw std::runtime_error( + this->name_ + " multi walker data pointer not supported"); + } + + /// return stride of per target pctl data. full table data = stride * num of + /// target particles + [[noreturn]] virtual size_t + getPerTargetPctlStrideSize() const + { + throw std::runtime_error( + this->name_ + " getPerTargetPctlStrideSize not supported"); + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/DynamicCoordinatesT.cpp b/src/Particle/DynamicCoordinatesT.cpp new file mode 100644 index 0000000000..b563d264c1 --- /dev/null +++ b/src/Particle/DynamicCoordinatesT.cpp @@ -0,0 +1,43 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/DynamicCoordinatesT.h" + +#include "Particle/RealSpacePositionsT.h" +#include "Particle/RealSpacePositionsTOMPTarget.h" + +namespace qmcplusplus +{ +/** create DynamicCoordinates based on kind + */ +template +std::unique_ptr> +createDynamicCoordinatesT(const DynamicCoordinateKind kind) +{ + if (kind == DynamicCoordinateKind::DC_POS) + return std::make_unique>(); + else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD) + return std::make_unique>(); + // dummy return + return std::unique_ptr>(); +} + +template std::unique_ptr> +createDynamicCoordinatesT(const DynamicCoordinateKind kind); +template std::unique_ptr> +createDynamicCoordinatesT(const DynamicCoordinateKind kind); +template std::unique_ptr>> +createDynamicCoordinatesT>( + const DynamicCoordinateKind kind); +template std::unique_ptr>> +createDynamicCoordinatesT>( + const DynamicCoordinateKind kind); +} // namespace qmcplusplus diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h new file mode 100644 index 0000000000..d7fc1994fa --- /dev/null +++ b/src/Particle/DynamicCoordinatesT.h @@ -0,0 +1,154 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file DynamicCoordinatesT.h + */ +#ifndef QMCPLUSPLUS_DYNAMICCOORDINATEST_H +#define QMCPLUSPLUS_DYNAMICCOORDINATEST_H + +#include + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "ParticleSetTraits.h" +#include "type_traits/template_types.hpp" +#include "DynamicCoordinates.h" + +namespace qmcplusplus +{ +class ResourceCollection; + +/** enumerator for DynamicCoordinates kinds + */ +// enum class DynamicCoordinateKind +// { +// DC_POS, // SoA positions +// DC_POS_OFFLOAD, // SoA positions with OpenMP offload +// }; + +/** quantum variables of all the particles + */ +template +class DynamicCoordinatesT +{ +public: + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using PosVectorSoa = + VectorSoaContainer::DIM>; + + DynamicCoordinatesT(const DynamicCoordinateKind kind_in) : + variable_kind_(kind_in) + { + } + + DynamicCoordinatesT(const DynamicCoordinatesT&) = default; + DynamicCoordinatesT& + operator=(const DynamicCoordinatesT&) = delete; + + DynamicCoordinateKind + getKind() const + { + return variable_kind_; + } + + virtual ~DynamicCoordinatesT() = default; + + virtual std::unique_ptr + makeClone() = 0; + + /** resize internal storages based on the number of particles + * @param n the number of particles + */ + virtual void + resize(size_t n) = 0; + /// return the number of particles + virtual size_t + size() const = 0; + + /// overwrite the positions of all the particles. + virtual void + setAllParticlePos(const ParticlePos& R) = 0; + /// overwrite the position of one the particle. + virtual void + setOneParticlePos(const PosType& pos, size_t iat) = 0; + /** copy the active positions of particles with a uniform id in all the + * walkers to a single internal buffer. + * @param coords_list a batch of DynamicCoordinates + * @param iat paricle id, uniform across coords_list + * @param new_positions proposed positions + */ + virtual void + mw_copyActivePos( + const RefVectorWithLeader& coords_list, size_t iat, + const std::vector& new_positions) const + { + assert(this == &coords_list.getLeader()); + } + + /** overwrite the positions of particles with a uniform id in all the + * walkers upon acceptance. + * @param coords_list a batch of DynamicCoordinates + * @param iat paricle id, uniform across coords_list + * @param new_positions proposed positions + * @param isAccepted accept/reject info + */ + virtual void + mw_acceptParticlePos( + const RefVectorWithLeader& coords_list, size_t iat, + const std::vector& new_positions, + const std::vector& isAccepted) const = 0; + + /// all particle position accessor + virtual const PosVectorSoa& + getAllParticlePos() const = 0; + /// one particle position accessor + virtual PosType + getOneParticlePos(size_t iat) const = 0; + + /// secure internal data consistency after p-by-p moves + virtual void + donePbyP() + { + } + + /// initialize a shared resource and hand it to a collection + virtual void + createResource(ResourceCollection& collection) const + { + } + + /// acquire a shared resource from a collection + virtual void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + { + } + + /// return a shared resource to a collection + virtual void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + { + } + +protected: + /// type of dynamic coordinates + const DynamicCoordinateKind variable_kind_; +}; + +/** create DynamicCoordinates based on kind + */ +template +std::unique_ptr> createDynamicCoordinatesT( + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); +} // namespace qmcplusplus +#endif diff --git a/src/Particle/Lattice/LRBreakupParameters.h b/src/Particle/Lattice/LRBreakupParameters.h index da44f6fc40..4096bf0e42 100644 --- a/src/Particle/Lattice/LRBreakupParameters.h +++ b/src/Particle/Lattice/LRBreakupParameters.h @@ -57,7 +57,7 @@ class LRBreakupParameters T beta2 = (dot(v1, v1) * dot(c, v2) - dot(v1, v2) * dot(c, v1)) / (dot(v1, v1) * dot(v2, v2) - dot(v1, v2) * dot(v1, v2)); TinyVector p = beta1 * v1 + beta2 * v2; - T dist = sqrt(dot(p - c, p - c)); + T dist = std::sqrt(dot(p - c, p - c)); LR_rc = std::min(LR_rc, dist); } //Set KC for structure-factor and LRbreakups. diff --git a/src/Particle/LongRange/KContainerT.cpp b/src/Particle/LongRange/KContainerT.cpp new file mode 100644 index 0000000000..eee850387d --- /dev/null +++ b/src/Particle/LongRange/KContainerT.cpp @@ -0,0 +1,272 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "KContainerT.h" + +#include "LRCoulombSingleton.h" +#include "Message/Communicate.h" +#include "Utilities/qmc_common.h" + +#include +#include + +namespace qmcplusplus +{ +template +void +KContainerT::updateKLists(const ParticleLayout& lattice, RealType kc, + unsigned ndim, const PosType& twist, bool useSphere) +{ + kcutoff = kc; + if (kcutoff <= 0.0) { + APP_ABORT(" Illegal cutoff for KContainer"); + } + findApproxMMax(lattice, ndim); + BuildKLists(lattice, twist, useSphere); + + app_log() << " KContainer initialised with cutoff " << kcutoff + << std::endl; + app_log() << " # of K-shell = " << kshell.size() << std::endl; + app_log() << " # of K points = " << kpts.size() << std::endl; + app_log() << std::endl; +} + +template +void +KContainerT::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) +{ + // Estimate the size of the parallelpiped that encompasses a sphere of + // kcutoff. mmax is stored as integer translations of the reciprocal cell + // vectors. Does not require an orthorhombic cell. + /* Old method. + //2pi is not included in lattice.b + Matrix mmat; + mmat.resize(3,3); + for(int j=0;j<3;j++) + for(int i=0;i<3;i++){ + mmat[i][j] = 0.0; + for(int k=0;k<3;k++) + mmat[i][j] = mmat[i][j] + 4.0*M_PI*M_PI*lattice.b(k)[i]*lattice.b(j)[k]; + } + + TinyVector x,temp; + RealType tempr; + for(int idim=0;idim<3;idim++){ + int i = ((idim)%3); + int j = ((idim+1)%3); + int k = ((idim+2)%3); + + x[i] = 1.0; + x[j] = (mmat[j][k]*mmat[k][i] - mmat[k][k]*mmat[i][j]); + x[j]/= (mmat[j][j]*mmat[k][k] - mmat[j][k]*mmat[j][k]); + x[k] = -(mmat[k][i] + mmat[j][k]*x[j])/mmat[k][k]; + + for(i=0;i<3;i++){ + temp[i] = 0.0; + for(j=0;j<3;j++) + temp[i] += mmat[i][j]*x[j]; + } + + tempr = dot(x,temp); + mmax[idim] = static_cast(sqrt(4.0*kcut2/tempr)) + 1; + } + */ + // see rmm, Electronic Structure, p. 85 for details + for (int i = 0; i < DIM; i++) + mmax[i] = static_cast( + std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) * + kcutoff / (2 * M_PI))) + + 1; + + mmax[DIM] = mmax[0]; + for (int i = 1; i < DIM; ++i) + mmax[DIM] = std::max(mmax[i], mmax[DIM]); + + // overwrite the non-periodic directon to be zero + if (LRCoulombSingleton::isQuasi2D()) { + app_log() << " No kspace sum perpendicular to slab " << std::endl; + mmax[2] = 0; + } + if (ndim < 3) { + app_log() << " No kspace sum along z " << std::endl; + mmax[2] = 0; + } + if (ndim < 2) + mmax[1] = 0; +} + +template +void +KContainerT::BuildKLists( + const ParticleLayout& lattice, const PosType& twist, bool useSphere) +{ + TinyVector TempActualMax; + TinyVector kvec; + TinyVector kvec_cart; + RealType modk2; + std::vector> kpts_tmp; + std::vector kpts_cart_tmp; + std::vector ksq_tmp; + // reserve the space for memory efficiency + if (useSphere) { + const RealType kcut2 = kcutoff * kcutoff; + // Loop over guesses for valid k-points. + for (int i = -mmax[0]; i <= mmax[0]; i++) { + kvec[0] = i; + for (int j = -mmax[1]; j <= mmax[1]; j++) { + kvec[1] = j; + for (int k = -mmax[2]; k <= mmax[2]; k++) { + kvec[2] = k; + // Do not include k=0 in evaluations. + if (i == 0 && j == 0 && k == 0) + continue; + // Convert kvec to Cartesian + kvec_cart = lattice.k_cart(kvec + twist); + // Find modk + modk2 = dot(kvec_cart, kvec_cart); + if (modk2 > kcut2) + continue; // Inside cutoff? + // This k-point should be added to the list + kpts_tmp.push_back(kvec); + kpts_cart_tmp.push_back(kvec_cart); + ksq_tmp.push_back(modk2); + // Update record of the allowed maximum translation. + for (int idim = 0; idim < 3; idim++) + if (std::abs(kvec[idim]) > TempActualMax[idim]) + TempActualMax[idim] = std::abs(kvec[idim]); + } + } + } + } + else { + // Loop over all k-points in the parallelpiped and add them to + // kcontainer note layout is for interfacing with fft, so for each + // dimension, the positive indexes come first then the negative indexes + // backwards e.g. 0, 1, .... mmax, -mmax+1, -mmax+2, ... -1 + const int idimsize = mmax[0] * 2; + const int jdimsize = mmax[1] * 2; + const int kdimsize = mmax[2] * 2; + for (int i = 0; i < idimsize; i++) { + kvec[0] = i; + if (kvec[0] > mmax[0]) + kvec[0] -= idimsize; + for (int j = 0; j < jdimsize; j++) { + kvec[1] = j; + if (kvec[1] > mmax[1]) + kvec[1] -= jdimsize; + for (int k = 0; k < kdimsize; k++) { + kvec[2] = k; + if (kvec[2] > mmax[2]) + kvec[2] -= kdimsize; + // get cartesian location and modk2 + kvec_cart = lattice.k_cart(kvec); + modk2 = dot(kvec_cart, kvec_cart); + // add k-point to lists + kpts_tmp.push_back(kvec); + kpts_cart_tmp.push_back(kvec_cart); + ksq_tmp.push_back(modk2); + } + } + } + // set allowed maximum translation + TempActualMax[0] = mmax[0]; + TempActualMax[1] = mmax[1]; + TempActualMax[2] = mmax[2]; + } + + // Update a record of the number of k vectors + numk = kpts_tmp.size(); + std::map*> kpts_sorted; + // create the map: use simple integer with resolution of 0.00000001 in ksq + for (int ik = 0; ik < numk; ik++) { + // This is a workaround for ewald bug (Issue #2105). Basically, 1e-7 is + // the resolution of |k|^2 for doubles, so we jack up the tolerance to + // match that. + const int64_t k_ind = static_cast(ksq_tmp[ik] * 10000000); + auto it(kpts_sorted.find(k_ind)); + if (it == kpts_sorted.end()) { + std::vector* newSet = new std::vector; + kpts_sorted[k_ind] = newSet; + newSet->push_back(ik); + } + else { + (*it).second->push_back(ik); + } + } + std::map*>::iterator it(kpts_sorted.begin()); + kpts.resize(numk); + kpts_cart.resize(numk); + kpts_cart_soa_.resize(numk); + ksq.resize(numk); + kshell.resize(kpts_sorted.size() + 1, 0); + int ok = 0, ish = 0; + while (it != kpts_sorted.end()) { + std::vector::iterator vit((*it).second->begin()); + while (vit != (*it).second->end()) { + int ik = (*vit); + kpts[ok] = kpts_tmp[ik]; + kpts_cart[ok] = kpts_cart_tmp[ik]; + kpts_cart_soa_(ok) = kpts_cart_tmp[ik]; + ksq[ok] = ksq_tmp[ik]; + ++vit; + ++ok; + } + kshell[ish + 1] = kshell[ish] + (*it).second->size(); + ++it; + ++ish; + } + kpts_cart_soa_.updateTo(); + it = kpts_sorted.begin(); + std::map*>::iterator e_it(kpts_sorted.end()); + while (it != e_it) { + delete it->second; + it++; + } + // Finished searching k-points. Copy list of maximum translations. + mmax[DIM] = 0; + for (int idim = 0; idim < DIM; idim++) { + mmax[idim] = TempActualMax[idim]; + mmax[DIM] = std::max(mmax[idim], mmax[DIM]); + // if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim]; + } + // Now fill the array that returns the index of -k when given the index of + // k. + minusk.resize(numk); + + // Assigns a unique hash value to each kpoint. + auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t { + int64_t hash = 0; // this will cause integral promotion below + for (int i = 0; i < inpv.Size; ++i) + hash += inpv[i] + hash * hashparam; + return hash; + }; + + // Create a map from the hash value for each k vector to the index + std::map hashToIndex; + for (int ki = 0; ki < numk; ki++) { + hashToIndex[getHashOfVec(kpts[ki], numk)] = ki; + } + // Use the map to find the index of -k from the index of k + for (int ki = 0; ki < numk; ki++) { + minusk[ki] = hashToIndex[getHashOfVec(-1 * kpts[ki], numk)]; + } +} + +template class KContainerT; +template class KContainerT; +template class KContainerT>; +template class KContainerT>; +} // namespace qmcplusplus diff --git a/src/Particle/LongRange/KContainerT.h b/src/Particle/LongRange/KContainerT.h new file mode 100644 index 0000000000..2f975569cc --- /dev/null +++ b/src/Particle/LongRange/KContainerT.h @@ -0,0 +1,115 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_KCONTAINERT_H +#define QMCPLUSPLUS_KCONTAINERT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "ParticleSetTraits.h" + +namespace qmcplusplus +{ +/** Container for k-points + * + * It generates a set of k-points that are unit-translations of the + * reciprocal-space cell. K-points are generated within a spherical cutoff set + * by the supercell + */ +template +class KContainerT +{ +public: + static constexpr auto DIM = ParticleSetTraits::DIM; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + +private: + /// The cutoff up to which k-vectors are generated. + RealType kcutoff; + +public: + // Typedef for the lattice-type + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + + /// number of k-points + int numk; + + /** maximum integer translations of reciprocal cell within kc. + * + * Last index is max. of first dimension+1 + */ + TinyVector mmax; + + /** K-vector in reduced coordinates + */ + std::vector> kpts; + /** K-vector in Cartesian coordinates + */ + std::vector kpts_cart; + /** squre of kpts in Cartesian coordniates + */ + std::vector ksq; + /** Given a k index, return index to -k + */ + std::vector minusk; + /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */ + std::vector kshell; + + /** k points sorted by the |k| excluding |k|=0 + * + * The first for |k| + * The second for a map to the full index. The size of the second is the + * degeneracy. + */ + // std::map*> kpts_sorted; + + /** update k-vectors + * @param sc supercell + * @param kc cutoff radius in the K + * @param twist shifts the center of the grid of k-vectors + * @param useSphere if true, use the |K| + */ + void + updateKLists(const ParticleLayout& lattice, RealType kc, unsigned ndim, + const PosType& twist = PosType(), bool useSphere = true); + + const auto& + get_kpts_cart_soa() const + { + return kpts_cart_soa_; + } + +private: + /** compute approximate parallelpiped that surrounds kc + * @param lattice supercell + */ + void + findApproxMMax(const ParticleLayout& lattice, unsigned ndim); + /** construct the container for k-vectors */ + void + BuildKLists( + const ParticleLayout& lattice, const PosType& twist, bool useSphere); + + /** K-vector in Cartesian coordinates in SoA layout + */ + VectorSoaContainer> + kpts_cart_soa_; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/LongRange/StructFactT.cpp b/src/Particle/LongRange/StructFactT.cpp new file mode 100644 index 0000000000..6f1dae8a9e --- /dev/null +++ b/src/Particle/LongRange/StructFactT.cpp @@ -0,0 +1,249 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak +// Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "StructFactT.h" + +#include "CPU/BLAS.hpp" +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "CPU/math.hpp" +#include "LRCoulombSingleton.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "RealSpacePositionsTOMPTarget.h" +#include "Utilities/qmc_common.h" +#include "ParticleSetT.h" + +namespace qmcplusplus +{ +// Constructor - pass arguments to k_lists_' constructor +template +StructFactT::StructFactT( + const ParticleLayout& lattice, const KContainer& k_lists) : + SuperCellEnum(SUPERCELL_BULK), + k_lists_(k_lists), + StorePerParticle(false), + update_all_timer_( + createGlobalTimer("StructFact::update_all_part", timer_level_fine)) +{ + if (LRCoulombSingleton::isQuasi2D()) { + app_log() << " Setting StructFact::SuperCellEnum=SUPERCELL_SLAB " + << std::endl; + SuperCellEnum = SUPERCELL_SLAB; + } +} + +// Destructor +template +StructFactT::~StructFactT() = default; + +template +void +StructFactT::resize(int nkpts, int num_species, int num_ptcls) +{ + rhok_r.resize(num_species, nkpts); + rhok_i.resize(num_species, nkpts); + if (StorePerParticle) { + eikr_r.resize(num_ptcls, nkpts); + eikr_i.resize(num_ptcls, nkpts); + } +} + +template +void +StructFactT::updateAllPart(const ParticleSetT& P) +{ + ScopedTimer local(update_all_timer_); + computeRhok(P); +} + +template +void +StructFactT::mw_updateAllPart( + const RefVectorWithLeader& sk_list, + const RefVectorWithLeader>& p_list, + SKMultiWalkerMemT& mw_mem) +{ + auto& sk_leader = sk_list.getLeader(); + auto& p_leader = p_list.getLeader(); + ScopedTimer local(sk_leader.update_all_timer_); + if (p_leader.getCoordinates().getKind() != + DynamicCoordinateKind::DC_POS_OFFLOAD || + sk_leader.StorePerParticle) + for (int iw = 0; iw < sk_list.size(); iw++) + sk_list[iw].computeRhok(p_list[iw]); + else { + const size_t nw = p_list.size(); + const size_t num_species = p_leader.groups(); + const auto& kpts_cart = sk_leader.k_lists_.get_kpts_cart_soa(); + const size_t nk = sk_leader.k_lists_.numk; + const size_t nk_padded = kpts_cart.capacity(); + + auto& coordinates_leader = + static_cast&>( + p_leader.getCoordinates()); + auto& mw_rsoa_dev_ptrs = + coordinates_leader.getMultiWalkerRSoADevicePtrs(); + const size_t np_padded = + p_leader.getCoordinates().getAllParticlePos().capacity(); + + constexpr size_t cplx_stride = 2; + mw_mem.nw_rhok.resize(nw * num_species * cplx_stride, nk_padded); + + // make the compute over nk by blocks + constexpr size_t kblock_size = 512; + const size_t num_kblocks = (nk + kblock_size) / kblock_size; + + auto* mw_rsoa_ptr = mw_rsoa_dev_ptrs.data(); + auto* kpts_cart_ptr = kpts_cart.data(); + auto* mw_rhok_ptr = mw_mem.nw_rhok.data(); + auto* group_offsets = p_leader.get_group_offsets().data(); + + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])") + for (int iw = 0; iw < nw; iw++) + for (int ib = 0; ib < num_kblocks; ib++) { + const size_t offset = ib * kblock_size; + const size_t this_block_size = + omptarget::min(kblock_size, nk - offset); + const auto* rsoa_ptr = mw_rsoa_ptr[iw]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int ik = 0; ik < this_block_size; ik++) + for (int is = 0; is < num_species; is++) { + RealType rhok_r(0), rhok_i(0); + + for (int ip = group_offsets[is]; + ip < group_offsets[is + 1]; ip++) { + RealType s, c, phase(0); + for (int idim = 0; idim < DIM; idim++) + phase += kpts_cart_ptr[ik + offset + + nk_padded * idim] * + rsoa_ptr[ip + idim * np_padded]; + omptarget::sincos(phase, &s, &c); + rhok_r += c; + rhok_i += s; + } + + mw_rhok_ptr[(iw * num_species + is) * cplx_stride * + nk_padded + + offset + ik] = rhok_r; + mw_rhok_ptr[(iw * num_species + is) * cplx_stride * + nk_padded + + nk_padded + offset + ik] = rhok_i; + } + } + + for (int iw = 0; iw < nw; iw++) + for (int is = 0; is < num_species; is++) { + std::copy_n( + mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride], nk, + sk_list[iw].rhok_r[is]); + std::copy_n( + mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride + 1], + nk, sk_list[iw].rhok_i[is]); + } + } +} + +/** evaluate rok per species, eikr per particle + */ +template +void +StructFactT::computeRhok(const ParticleSetT& P) +{ + const size_t num_ptcls = P.getTotalNum(); + const size_t num_species = P.groups(); + const size_t nk = k_lists_.numk; + resize(nk, num_species, num_ptcls); + + rhok_r = 0.0; + rhok_i = 0.0; + if (StorePerParticle) { + // save per particle and species value + for (int i = 0; i < num_ptcls; ++i) { + const auto& pos = P.R[i]; + auto* restrict eikr_r_ptr = eikr_r[i]; + auto* restrict eikr_i_ptr = eikr_i[i]; + auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)]; + auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)]; +#pragma omp simd + for (int ki = 0; ki < nk; ki++) { + qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), + &eikr_i_ptr[ki], &eikr_r_ptr[ki]); + rhok_r_ptr[ki] += eikr_r_ptr[ki]; + rhok_i_ptr[ki] += eikr_i_ptr[ki]; + } + } + } + else { + // save per species value + for (int i = 0; i < num_ptcls; ++i) { + const auto& pos = P.R[i]; + auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)]; + auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)]; +#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) +#pragma omp simd + for (int ki = 0; ki < nk; ki++) { + RealType s, c; + qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &s, &c); + rhok_r_ptr[ki] += c; + rhok_i_ptr[ki] += s; + } +#else + // make the compute over nk by blocks + constexpr size_t kblock_size = 512; + const size_t num_kblocks = (nk + kblock_size) / kblock_size; + RealType phiV[kblock_size], eikr_r_temp[kblock_size], + eikr_i_temp[kblock_size]; + + for (int ib = 0; ib < num_kblocks; ib++) { + const size_t offset = ib * kblock_size; + const size_t this_block_size = + std::min(kblock_size, nk - offset); + for (int ki = 0; ki < this_block_size; ki++) + phiV[ki] = dot(k_lists_.kpts_cart[ki + offset], pos); + eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp); + for (int ki = 0; ki < this_block_size; ki++) { + rhok_r_ptr[ki + offset] += eikr_r_temp[ki]; + rhok_i_ptr[ki + offset] += eikr_i_temp[ki]; + } + } +#endif + } + } +} + +template +void +StructFactT::turnOnStorePerParticle(const ParticleSetT& P) +{ + if (!StorePerParticle) { + StorePerParticle = true; + computeRhok(P); + } +} + +template class StructFactT; +template class StructFactT; +template class StructFactT>; +template class StructFactT>; + +template struct SKMultiWalkerMemT; +template struct SKMultiWalkerMemT; +template struct SKMultiWalkerMemT>; +template struct SKMultiWalkerMemT>; +} // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h new file mode 100644 index 0000000000..218b3adf31 --- /dev/null +++ b/src/Particle/LongRange/StructFactT.h @@ -0,0 +1,159 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_STRUCTFACTT_H +#define QMCPLUSPLUS_STRUCTFACTT_H + +#include "OhmmsPETE/OhmmsMatrix.h" +#include "OhmmsPETE/OhmmsVector.h" +#include "Particle/ParticleSetTraits.h" +#include +#include +#include +#include + +namespace qmcplusplus +{ +template +class ParticleSetT; +class KContainer; +template +struct SKMultiWalkerMemT; + +/** @ingroup longrange + *\brief Calculates the structure-factor for a particle set + * + * Structure factor per species + * Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf + *r_i}}\f$ Structure factor per particle eikr[i][k] + */ +template +class StructFactT +{ +public: + // Typedef for the lattice-type + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + using RealType = typename ParticleSetTraits::RealType; + + static constexpr auto DIM = ParticleSetTraits::DIM; + + /** enumeration for the methods to handle mixed bconds + * + * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with + * mixed BC + */ + int SuperCellEnum; + /// 2-D container for the phase + Matrix rhok_r, rhok_i; + Matrix eikr_r, eikr_i; + /** Constructor - copy ParticleSet and init. k-shells + * @param lattice long range box + * @param kc cutoff for k + * + * At least in the batched version Structure factor is _NOT_ valid + * after construction. + */ + StructFactT(const ParticleLayout& lattice, const KContainer& k_lists); + /// desructor + ~StructFactT(); + + /** Update Rhok if all particles moved + */ + void + updateAllPart(const ParticleSetT& P); + + /** Update RhoK for all particles for multiple walkers particles. + * + * In batched context until this is called StructFact is invalid and will + * cause a crash if any Hamiltonian using StructFact indirectly through + * ParticleSet is evaluated. + */ + static void + mw_updateAllPart(const RefVectorWithLeader& sk_list, + const RefVectorWithLeader>& p_list, + SKMultiWalkerMemT& mw_mem); + + /** @brief switch on the storage per particle + * if StorePerParticle was false, this function allocates memory and + * precompute data if StorePerParticle was true, this function is no-op + */ + void + turnOnStorePerParticle(const ParticleSetT& P); + + /// accessor of StorePerParticle + bool + isStorePerParticle() const + { + return StorePerParticle; + } + + /// accessor of k_lists_ + const KContainer& + getKLists() const + { + return k_lists_; + } + +private: + /// Compute all rhok elements from the start + void + computeRhok(const ParticleSetT& P); + /** resize the internal data + * @param nkpts + * @param num_species number of species + * @param num_ptcls number of particles + */ + void + resize(int nkpts, int num_species, int num_ptcls); + + /// K-Vector List. + const KContainer& k_lists_; + /** Whether intermediate data is stored per particle. default false + * storing data per particle needs significant amount of memory but some + * calculation may request it. storing data per particle specie is more + * cost-effective + */ + bool StorePerParticle; + /// timer for updateAllPart + NewTimer& update_all_timer_; +}; + +/// multi walker shared memory buffer +template +struct SKMultiWalkerMemT : public Resource +{ + using RealType = typename StructFactT::RealType; + + /// dist displ for temporary and old pairs + Matrix> nw_rhok; + + SKMultiWalkerMemT() : Resource("SKMultiWalkerMem") + { + } + + SKMultiWalkerMemT(const SKMultiWalkerMemT&) : SKMultiWalkerMemT() + { + } + + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/MCCoordsT.cpp b/src/Particle/MCCoordsT.cpp new file mode 100644 index 0000000000..fd63c84a6c --- /dev/null +++ b/src/Particle/MCCoordsT.cpp @@ -0,0 +1,69 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "MCCoordsT.hpp" + +namespace qmcplusplus +{ +template +void +MCCoordsT::getSubset(const std::size_t offset, + const std::size_t size, MCCoordsT& out) const +{ + std::copy_n(positions.begin() + offset, size, out.positions.begin()); +} + +template +MCCoordsT& +MCCoordsT::operator+=( + const MCCoordsT& rhs) +{ + assert(positions.size() == rhs.positions.size()); + std::transform(positions.begin(), positions.end(), rhs.positions.begin(), + positions.begin(), + [](const PosType& x, const PosType& y) { return x + y; }); + return *this; +} + +template +void +MCCoordsT::getSubset(const std::size_t offset, + const std::size_t size, MCCoordsT& out) const +{ + std::copy_n(positions.begin() + offset, size, out.positions.begin()); + std::copy_n(spins.begin() + offset, size, out.spins.begin()); +} + +template +MCCoordsT& +MCCoordsT::operator+=( + const MCCoordsT& rhs) +{ + assert(positions.size() == rhs.positions.size()); + std::transform(positions.begin(), positions.end(), rhs.positions.begin(), + positions.begin(), + [](const PosType& x, const PosType& y) { return x + y; }); + std::transform(spins.begin(), spins.end(), rhs.spins.begin(), spins.begin(), + [](const FullPrecRealType& x, const FullPrecRealType& y) { + return x + y; + }); + return *this; +} + +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT, CoordsType::POS>; +template struct MCCoordsT, CoordsType::POS_SPIN>; +template struct MCCoordsT, CoordsType::POS>; +template struct MCCoordsT, CoordsType::POS_SPIN>; +} // namespace qmcplusplus diff --git a/src/Particle/MCCoordsT.hpp b/src/Particle/MCCoordsT.hpp new file mode 100644 index 0000000000..50b419178f --- /dev/null +++ b/src/Particle/MCCoordsT.hpp @@ -0,0 +1,82 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_MCCOORDST_HPP +#define QMCPLUSPLUS_MCCOORDST_HPP + +#include "MCCoords.hpp" +#include "ParticleSetTraits.h" +#include "type_traits/complex_help.hpp" + +#include +#include + +namespace qmcplusplus +{ +// enum class CoordsType +// { +// POS, +// POS_SPIN +// }; + +template +struct MCCoordsT; + +template +struct MCCoordsT +{ + using PosType = typename ParticleSetTraits::PosType; + + MCCoordsT(const std::size_t size) : positions(size) + { + } + + MCCoordsT& + operator+=(const MCCoordsT& rhs); + + /** get subset of MCCoordsT + * [param,out] out + */ + void + getSubset(const std::size_t offset, const std::size_t size, + MCCoordsT& out) const; + + std::vector positions; +}; + +template +struct MCCoordsT +{ + using PosType = typename ParticleSetTraits::PosType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + + MCCoordsT(const std::size_t size) : positions(size), spins(size) + { + } + + MCCoordsT& + operator+=(const MCCoordsT& rhs); + + /** get subset of MCCoordsT + * [param,out] out + */ + void + getSubset(const std::size_t offset, const std::size_t size, + MCCoordsT& out) const; + + std::vector positions; + std::vector spins; +}; +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp new file mode 100644 index 0000000000..5b78bed54e --- /dev/null +++ b/src/Particle/ParticleSetT.cpp @@ -0,0 +1,1200 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Luke Shulenburger, lshulen@sandia.gov, Sandia National +// Laboratories Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Ye Luo, yeluo@anl.gov, Argonne +// National Laboratory Mark A. Berrill, berrillma@ornl.gov, +// Oak Ridge National Laboratory Mark Dewing, +// markdewing@gmail.com, University of Illinois at +// Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "ParticleSetT.h" + +#include "Particle/DistanceTableT.h" +#include "Particle/DynamicCoordinatesBuilder.h" +#include "Particle/LongRange/StructFactT.h" +#include "Particle/createDistanceTableT.h" +#include "ParticleBase/RandomSeqGeneratorGlobal.h" +#include "ResourceCollection.h" +#include "Utilities/IteratorUtility.h" +#include "Utilities/RandomGenerator.h" + +#include +#include + +namespace qmcplusplus +{ +using WP = WalkerProperties::Indexes; + +enum PSetTimers +{ + PS_newpos, + PS_donePbyP, + PS_accept, + PS_loadWalker, + PS_update, + PS_dt_move, + PS_mw_copy +}; + +static const TimerNameList_t +generatePSetTimerNames(std::string& obj_name) +{ + return {{PS_newpos, "ParticleSet:" + obj_name + "::computeNewPosDT"}, + {PS_donePbyP, "ParticleSet:" + obj_name + "::donePbyP"}, + {PS_accept, "ParticleSet:" + obj_name + "::acceptMove"}, + {PS_loadWalker, "ParticleSet:" + obj_name + "::loadWalker"}, + {PS_update, "ParticleSet:" + obj_name + "::update"}, + {PS_dt_move, "ParticleSet:" + obj_name + "::dt_move"}, + {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}}; +} + +template +ParticleSetT::ParticleSetT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind) : + quantum_domain(classical), + Properties(0, 0, 1, WP::MAXPROPERTIES), + simulation_cell_(simulation_cell), + same_mass_(true), + is_spinor_(false), + active_ptcl_(-1), + active_spin_val_(0.0), + myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), + timer_level_medium), + myTwist(0.0), + ParentName("0"), + TotalNum(0), + group_offsets_(std::make_shared>>()), + coordinates_(createDynamicCoordinatesT(kind)) +{ + initPropertyList(); +} + +template +ParticleSetT::ParticleSetT(const ParticleSetT& p) : + Properties(p.Properties), + simulation_cell_(p.simulation_cell_), + same_mass_(true), + is_spinor_(false), + active_ptcl_(-1), + active_spin_val_(0.0), + my_species_(p.getSpeciesSet()), + myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), + timer_level_medium), + myTwist(0.0), + ParentName(p.parentName()), + group_offsets_(p.group_offsets_), + coordinates_(p.coordinates_->makeClone()) +{ + setQuantumDomain(p.quantum_domain); + + resize(p.getTotalNum()); + R.InUnit = p.R.InUnit; + R = p.R; + spins = p.spins; + GroupID = p.GroupID; + is_spinor_ = p.is_spinor_; + + // need explicit copy: + Mass = p.Mass; + Z = p.Z; + // std::ostringstream o; + // o<setName(o.str()); + // app_log() << " Copying a particle set " << p.getName() << " to " << + // this->getName() << " groups=" << groups() << std::endl; + myName = p.getName(); + PropertyList.Names = p.PropertyList.Names; + PropertyList.Values = p.PropertyList.Values; + PropertyHistory = p.PropertyHistory; + Collectables = p.Collectables; + // construct the distance tables with the same order + for (int i = 0; i < p.DistTables.size(); ++i) + addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes()); + + if (p.structure_factor_) + structure_factor_ = + std::make_unique>(*p.structure_factor_); + myTwist = p.myTwist; + + G = p.G; + L = p.L; +} + +template +ParticleSetT::~ParticleSetT() = default; + +template +void +ParticleSetT::create(const std::vector& agroup) +{ + auto& group_offsets(*group_offsets_); + group_offsets.resize(agroup.size() + 1); + group_offsets[0] = 0; + for (int is = 0; is < agroup.size(); is++) + group_offsets[is + 1] = group_offsets[is] + agroup[is]; + group_offsets.updateTo(); + const size_t nsum = group_offsets[agroup.size()]; + resize(nsum); + TotalNum = nsum; + int loc = 0; + for (int i = 0; i < agroup.size(); i++) + for (int j = 0; j < agroup[i]; j++, loc++) + GroupID[loc] = i; +} + +template +void +ParticleSetT::setQuantumDomain(quantum_domains qdomain) +{ + if (quantumDomainValid(qdomain)) + quantum_domain = qdomain; + else + throw std::runtime_error("ParticleSet::setQuantumDomain\n input " + "quantum domain is not valid for particles"); +} + +template +void +ParticleSetT::resetGroups() +{ + const int nspecies = my_species_.getTotalNum(); + // Usually an empty ParticleSet indicates an error in the input file, + // but in some cases it is useful. Allow an empty ParticleSet if it + // has the special name "empty". + if (nspecies == 0 && getName() != "empty") { + throw std::runtime_error( + "ParticleSet::resetGroups() Failed. No species exisits"); + } + int natt = my_species_.numAttributes(); + int qind = my_species_.addAttribute("charge"); + if (natt == qind) { + app_log() << " Missing charge attribute of the SpeciesSet " << myName + << " particleset" << std::endl; + app_log() << " Assume neutral particles Z=0.0 " << std::endl; + for (int ig = 0; ig < nspecies; ig++) + my_species_(qind, ig) = 0.0; + } + for (int iat = 0; iat < Z.size(); iat++) + Z[iat] = my_species_(qind, GroupID[iat]); + natt = my_species_.numAttributes(); + int massind = my_species_.addAttribute("mass"); + if (massind == natt) { + for (int ig = 0; ig < nspecies; ig++) + my_species_(massind, ig) = 1.0; + } + same_mass_ = true; + double m0 = my_species_(massind, 0); + for (int ig = 1; ig < nspecies; ig++) + same_mass_ &= (my_species_(massind, ig) == m0); + if (same_mass_) + app_log() << " All the species have the same mass " << m0 << std::endl; + else + app_log() << " Distinctive masses for each species " << std::endl; + for (int iat = 0; iat < Mass.size(); iat++) + Mass[iat] = my_species_(massind, GroupID[iat]); + + int membersize = my_species_.addAttribute("membersize"); + for (int ig = 0; ig < nspecies; ++ig) + my_species_(membersize, ig) = groupsize(ig); + + for (int iat = 0; iat < GroupID.size(); iat++) + assert(GroupID[iat] < nspecies); +} + +template +void +ParticleSetT::randomizeFromSource(ParticleSetT& src) +{ + SpeciesSet& srcSpSet(src.getSpeciesSet()); + SpeciesSet& spSet(getSpeciesSet()); + int srcChargeIndx = srcSpSet.addAttribute("charge"); + int srcMemberIndx = srcSpSet.addAttribute("membersize"); + int ChargeIndex = spSet.addAttribute("charge"); + int MemberIndx = spSet.addAttribute("membersize"); + int Nsrc = src.getTotalNum(); + int Nptcl = getTotalNum(); + int NumSpecies = spSet.TotalNum; + int NumSrcSpecies = srcSpSet.TotalNum; + // Store information about charges and number of each species + std::vector Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec; + Zat.resize(Nsrc); + Zspec.resize(NumSrcSpecies); + NofSpecies.resize(NumSpecies); + CurElec.resize(NumSpecies); + NofSrcSpecies.resize(NumSrcSpecies); + for (int spec = 0; spec < NumSrcSpecies; spec++) { + Zspec[spec] = (int)round(srcSpSet(srcChargeIndx, spec)); + NofSrcSpecies[spec] = (int)round(srcSpSet(srcMemberIndx, spec)); + } + for (int spec = 0; spec < NumSpecies; spec++) { + NofSpecies[spec] = (int)round(spSet(MemberIndx, spec)); + CurElec[spec] = first(spec); + } + int totQ = 0; + for (int iat = 0; iat < Nsrc; iat++) + totQ += Zat[iat] = Zspec[src.GroupID[iat]]; + app_log() << " Total ion charge = " << totQ << std::endl; + totQ -= Nptcl; + app_log() << " Total system charge = " << totQ << std::endl; + // Now, loop over ions, attaching electrons to them to neutralize + // charge + int spToken = 0; + // This is decremented when we run out of electrons in each species + int spLeft = NumSpecies; + std::vector gaussRand(Nptcl); + makeGaussRandom(gaussRand); + for (int iat = 0; iat < Nsrc; iat++) { + // Loop over electrons to add, selecting round-robin from the + // electron species + int z = Zat[iat]; + while (z > 0 && spLeft) { + int sp = spToken++ % NumSpecies; + if (NofSpecies[sp]) { + NofSpecies[sp]--; + z--; + int elec = CurElec[sp]++; + app_log() << " Assigning " << (sp ? "down" : "up ") + << " electron " << elec << " to ion " << iat + << " with charge " << z << std::endl; + double radius = 0.5 * std::sqrt((double)Zat[iat]); + R[elec] = src.R[iat] + radius * gaussRand[elec]; + } + else + spLeft--; + } + } + // Assign remaining electrons + int ion = 0; + for (int sp = 0; sp < NumSpecies; sp++) { + for (int ie = 0; ie < NofSpecies[sp]; ie++) { + int iat = ion++ % Nsrc; + double radius = std::sqrt((double)Zat[iat]); + int elec = CurElec[sp]++; + R[elec] = src.R[iat] + radius * gaussRand[elec]; + } + } +} + +template +void +ParticleSetT::print(std::ostream& os, const size_t maxParticlesToPrint) const +{ + os << " ParticleSet '" << getName() << "' contains " << TotalNum + << " particles : "; + if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0) + for (int i = 0; i < group_offsets.size() - 1; i++) + os << " " << my_species_.speciesName[i] << "(" + << group_offsets[i + 1] - group_offsets[i] << ")"; + os << std::endl << std::endl; + + const size_t numToPrint = maxParticlesToPrint == 0 ? + TotalNum : + std::min(TotalNum, maxParticlesToPrint); + + for (int i = 0; i < numToPrint; i++) { + os << " " << my_species_.speciesName[GroupID[i]] << R[i] + << std::endl; + } + if (numToPrint < TotalNum) { + os << " (... and " << (TotalNum - numToPrint) + << " more particle positions ...)" << std::endl; + } + os << std::endl; + + for (const std::string& description : distTableDescriptions) + os << description; + os << std::endl; +} + +template +bool +ParticleSetT::get(std::ostream& is) const +{ + return true; +} + +template +bool +ParticleSetT::put(std::istream& is) +{ + return true; +} + +template +void +ParticleSetT::reset() +{ + app_log() << "<<<< going to set properties >>>> " << std::endl; +} + +/// read the particleset +template +bool +ParticleSetT::put(xmlNodePtr cur) +{ + return true; +} + +template +int +ParticleSetT::addTable(const ParticleSetT& psrc, DTModes modes) +{ + if (myName == "none" || psrc.getName() == "none") + throw std::runtime_error("ParticleSet::addTable needs proper names for " + "both source and target particle sets."); + + int tid; + std::map::iterator tit( + myDistTableMap.find(psrc.getName())); + if (tit == myDistTableMap.end()) { + std::ostringstream description; + tid = DistTables.size(); + if (myName == psrc.getName()) + DistTables.push_back(createDistanceTableT(*this, description)); + else + DistTables.push_back( + createDistanceTableT(psrc, *this, description)); + distTableDescriptions.push_back(description.str()); + myDistTableMap[psrc.getName()] = tid; + app_debug() << " ... ParticleSet::addTable Create Table #" << tid + << " " << DistTables[tid]->getName() << std::endl; + } + else { + tid = (*tit).second; + app_debug() << " ... ParticleSet::addTable Reuse Table #" << tid << " " + << DistTables[tid]->getName() << std::endl; + } + + DistTables[tid]->setModes(DistTables[tid]->getModes() | modes); + + app_log().flush(); + return tid; +} + +template +const DistanceTableAAT& +ParticleSetT::getDistTableAA(int table_ID) const +{ + return dynamic_cast&>(*DistTables[table_ID]); +} + +template +const DistanceTableABT& +ParticleSetT::getDistTableAB(int table_ID) const +{ + return dynamic_cast&>(*DistTables[table_ID]); +} + +template +void +ParticleSetT::update(bool skipSK) +{ + ScopedTimer update_scope(myTimers[PS_update]); + + coordinates_->setAllParticlePos(R); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->evaluate(*this); + if (!skipSK && structure_factor_) + structure_factor_->updateAllPart(*this); + + active_ptcl_ = -1; +} + +template +void +ParticleSetT::mw_update( + const RefVectorWithLeader& p_list, bool skipSK) +{ + auto& p_leader = p_list.getLeader(); + ScopedTimer update_scope(p_leader.myTimers[PS_update]); + + for (ParticleSetT& pset : p_list) + pset.coordinates_->setAllParticlePos(pset.R); + + auto& dts = p_leader.DistTables; + for (int i = 0; i < dts.size(); ++i) { + const auto dt_list(extractDTRefList(p_list, i)); + dts[i]->mw_evaluate(dt_list, p_list); + } + + if (!skipSK && p_leader.structure_factor_) + for (int iw = 0; iw < p_list.size(); iw++) + p_list[iw].structure_factor_->updateAllPart(p_list[iw]); +} + +template +void +ParticleSetT::makeMove( + Index_t iat, const SingleParticlePos& displ, bool maybe_accept) +{ + active_ptcl_ = iat; + active_pos_ = R[iat] + displ; + active_spin_val_ = spins[iat]; + computeNewPosDistTables(iat, active_pos_, maybe_accept); +} + +template +void +ParticleSetT::makeMoveWithSpin( + Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) +{ + makeMove(iat, displ); + active_spin_val_ += sdispl; +} + +template +template +void +ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, + Index_t iat, const MCCoordsT& displs) +{ + mw_makeMove(p_list, iat, displs.positions); + if constexpr (CT == CoordsType::POS_SPIN) + mw_makeSpinMove(p_list, iat, displs.spins); +} + +template +void +ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& displs) +{ + std::vector new_positions; + new_positions.reserve(displs.size()); + + for (int iw = 0; iw < p_list.size(); iw++) { + p_list[iw].active_ptcl_ = iat; + p_list[iw].active_pos_ = p_list[iw].R[iat] + displs[iw]; + new_positions.push_back(p_list[iw].active_pos_); + } + + mw_computeNewPosDistTables(p_list, iat, new_positions); +} + +template +void +ParticleSetT::mw_makeSpinMove( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& sdispls) +{ + for (int iw = 0; iw < p_list.size(); iw++) + p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw]; +} + +template +bool +ParticleSetT::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ) +{ + active_ptcl_ = iat; + active_pos_ = R[iat] + displ; + active_spin_val_ = spins[iat]; + bool is_valid = true; + auto& Lattice = simulation_cell_.getLattice(); + if (Lattice.explicitly_defined) { + if (Lattice.outOfBound(Lattice.toUnit(displ))) + is_valid = false; + else { + SingleParticlePos newRedPos = Lattice.toUnit(active_pos_); + if (!Lattice.isValid(newRedPos)) + is_valid = false; + } + } + computeNewPosDistTables(iat, active_pos_, true); + return is_valid; +} + +template +bool +ParticleSetT::makeMoveAndCheckWithSpin( + Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) +{ + bool is_valid = makeMoveAndCheck(iat, displ); + active_spin_val_ += sdispl; + return is_valid; +} + +template +void +ParticleSetT::computeNewPosDistTables( + Index_t iat, const SingleParticlePos& newpos, bool maybe_accept) +{ + ScopedTimer compute_newpos_scope(myTimers[PS_newpos]); + + for (int i = 0; i < DistTables.size(); ++i) + DistTables[i]->move(*this, newpos, iat, maybe_accept); +} + +template +void +ParticleSetT::mw_computeNewPosDistTables( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& new_positions, bool maybe_accept) +{ + ParticleSetT& p_leader = p_list.getLeader(); + ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]); + + { + ScopedTimer copy_scope(p_leader.myTimers[PS_mw_copy]); + const auto coords_list(extractCoordsRefList(p_list)); + p_leader.coordinates_->mw_copyActivePos( + coords_list, iat, new_positions); + } + + { + ScopedTimer dt_scope(p_leader.myTimers[PS_dt_move]); + const int dist_tables_size = p_leader.DistTables.size(); + for (int i = 0; i < dist_tables_size; ++i) { + const auto dt_list(extractDTRefList(p_list, i)); + p_leader.DistTables[i]->mw_move( + dt_list, p_list, new_positions, iat, maybe_accept); + } + + // DistTables mw_move calls are asynchronous. Wait for them before + // return. + PRAGMA_OFFLOAD("omp taskwait") + } +} + +template +bool +ParticleSetT::makeMoveAllParticles( + const Walker_t& awalker, const ParticlePos& deltaR, RealType dt) +{ + active_ptcl_ = -1; + auto& Lattice = simulation_cell_.getLattice(); + if (Lattice.explicitly_defined) { + for (int iat = 0; iat < deltaR.size(); ++iat) { + SingleParticlePos displ(dt * deltaR[iat]); + if (Lattice.outOfBound(Lattice.toUnit(displ))) + return false; + SingleParticlePos newpos(awalker.R[iat] + displ); + if (!Lattice.isValid(Lattice.toUnit(newpos))) + return false; + R[iat] = newpos; + } + } + else { + for (int iat = 0; iat < deltaR.size(); ++iat) + R[iat] = awalker.R[iat] + dt * deltaR[iat]; + } + coordinates_->setAllParticlePos(R); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->evaluate(*this); + if (structure_factor_) + structure_factor_->updateAllPart(*this); + // every move is valid + return true; +} + +template +bool +ParticleSetT::makeMoveAllParticles(const Walker_t& awalker, + const ParticlePos& deltaR, const std::vector& dt) +{ + active_ptcl_ = -1; + auto& Lattice = simulation_cell_.getLattice(); + if (Lattice.explicitly_defined) { + for (int iat = 0; iat < deltaR.size(); ++iat) { + SingleParticlePos displ(dt[iat] * deltaR[iat]); + if (Lattice.outOfBound(Lattice.toUnit(displ))) + return false; + SingleParticlePos newpos(awalker.R[iat] + displ); + if (!Lattice.isValid(Lattice.toUnit(newpos))) + return false; + R[iat] = newpos; + } + } + else { + for (int iat = 0; iat < deltaR.size(); ++iat) + R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat]; + } + coordinates_->setAllParticlePos(R); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->evaluate(*this); + if (structure_factor_) + structure_factor_->updateAllPart(*this); + // every move is valid + return true; +} + +/** move a walker by dt*deltaR + drift + * @param awalker initial walker configuration + * @param drift drift vector + * @param deltaR random displacement + * @param dt timestep + * @return true, if all the particle moves are legal under the boundary + * conditions + */ +template +bool +ParticleSetT::makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, const ParticlePos& deltaR, RealType dt) +{ + active_ptcl_ = -1; + auto& Lattice = simulation_cell_.getLattice(); + if (Lattice.explicitly_defined) { + for (int iat = 0; iat < deltaR.size(); ++iat) { + SingleParticlePos displ(dt * deltaR[iat] + drift[iat]); + if (Lattice.outOfBound(Lattice.toUnit(displ))) + return false; + SingleParticlePos newpos(awalker.R[iat] + displ); + if (!Lattice.isValid(Lattice.toUnit(newpos))) + return false; + R[iat] = newpos; + } + } + else { + for (int iat = 0; iat < deltaR.size(); ++iat) + R[iat] = awalker.R[iat] + dt * deltaR[iat] + drift[iat]; + } + coordinates_->setAllParticlePos(R); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->evaluate(*this); + if (structure_factor_) + structure_factor_->updateAllPart(*this); + // every move is valid + return true; +} + +template +bool +ParticleSetT::makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, const ParticlePos& deltaR, + const std::vector& dt) +{ + active_ptcl_ = -1; + auto& Lattice = simulation_cell_.getLattice(); + if (Lattice.explicitly_defined) { + for (int iat = 0; iat < deltaR.size(); ++iat) { + SingleParticlePos displ(dt[iat] * deltaR[iat] + drift[iat]); + if (Lattice.outOfBound(Lattice.toUnit(displ))) + return false; + SingleParticlePos newpos(awalker.R[iat] + displ); + if (!Lattice.isValid(Lattice.toUnit(newpos))) + return false; + R[iat] = newpos; + } + } + else { + for (int iat = 0; iat < deltaR.size(); ++iat) + R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat] + drift[iat]; + } + coordinates_->setAllParticlePos(R); + + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->evaluate(*this); + if (structure_factor_) + structure_factor_->updateAllPart(*this); + // every move is valid + return true; +} + +/** update the particle attribute by the proposed move + * + * When the active_ptcl_ is equal to iat, overwrite the position and update the + * content of the distance tables. + */ +template +void +ParticleSetT::acceptMove(Index_t iat) +{ +#ifndef NDEBUG + if (iat != active_ptcl_) + throw std::runtime_error( + "Bug detected by acceptMove! Request electron is not active!"); +#endif + ScopedTimer update_scope(myTimers[PS_accept]); + // Update position + distance-table + coordinates_->setOneParticlePos(active_pos_, iat); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->update(iat); + + R[iat] = active_pos_; + spins[iat] = active_spin_val_; + active_ptcl_ = -1; +} + +template +void +ParticleSetT::acceptMoveForwardMode(Index_t iat) +{ + assert(iat == active_ptcl_); + ScopedTimer update_scope(myTimers[PS_accept]); + // Update position + distance-table + coordinates_->setOneParticlePos(active_pos_, iat); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->updatePartial(iat, true); + + R[iat] = active_pos_; + spins[iat] = active_spin_val_; + active_ptcl_ = -1; +} + +template +void +ParticleSetT::accept_rejectMove( + Index_t iat, bool accepted, bool forward_mode) +{ + if (forward_mode) + if (accepted) + acceptMoveForwardMode(iat); + else + rejectMoveForwardMode(iat); + else if (accepted) + acceptMove(iat); + else + rejectMove(iat); +} + +template +void +ParticleSetT::rejectMove(Index_t iat) +{ +#ifndef NDEBUG + if (iat != active_ptcl_) + throw std::runtime_error( + "Bug detected by rejectMove! Request electron is not active!"); +#endif + active_ptcl_ = -1; +} + +template +void +ParticleSetT::rejectMoveForwardMode(Index_t iat) +{ + assert(iat == active_ptcl_); + // Update distance-table + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->updatePartial(iat, false); + active_ptcl_ = -1; +} + +template +template +void +ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode) +{ + if constexpr (CT == CoordsType::POS_SPIN) + mw_accept_rejectSpinMove(p_list, iat, isAccepted); + mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode); +} + +template +void +ParticleSetT::mw_accept_rejectMove( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode) +{ + if (forward_mode) { + ParticleSetT& p_leader = p_list.getLeader(); + ScopedTimer update_scope(p_leader.myTimers[PS_accept]); + + const auto coords_list(extractCoordsRefList(p_list)); + std::vector new_positions; + new_positions.reserve(p_list.size()); + for (const ParticleSetT& pset : p_list) + new_positions.push_back(pset.active_pos_); + p_leader.coordinates_->mw_acceptParticlePos( + coords_list, iat, new_positions, isAccepted); + + auto& dts = p_leader.DistTables; + for (int i = 0; i < dts.size(); ++i) { + const auto dt_list(extractDTRefList(p_list, i)); + dts[i]->mw_updatePartial(dt_list, iat, isAccepted); + } + + for (int iw = 0; iw < p_list.size(); iw++) { + assert(iat == p_list[iw].active_ptcl_); + if (isAccepted[iw]) + p_list[iw].R[iat] = p_list[iw].active_pos_; + p_list[iw].active_ptcl_ = -1; + assert(p_list[iw].R[iat] == + p_list[iw].coordinates_->getAllParticlePos()[iat]); + } + } + else { + // loop over single walker acceptMove/rejectMove doesn't work safely. + // need to code carefully for both coordinate and distance table updates + // disable non-forward mode cases + if (!forward_mode) + throw std::runtime_error( + "BUG calling mw_accept_rejectMove in non-forward mode"); + } +} + +template +void +ParticleSetT::mw_accept_rejectSpinMove( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted) +{ + for (int iw = 0; iw < p_list.size(); iw++) { + assert(iat == p_list[iw].active_ptcl_); + if (isAccepted[iw]) + p_list[iw].spins[iat] = p_list[iw].active_spin_val_; + } +} + +template +void +ParticleSetT::donePbyP(bool skipSK) +{ + ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]); + coordinates_->donePbyP(); + if (!skipSK && structure_factor_) + structure_factor_->updateAllPart(*this); + for (size_t i = 0; i < DistTables.size(); ++i) + DistTables[i]->finalizePbyP(*this); + active_ptcl_ = -1; +} + +template +void +ParticleSetT::mw_donePbyP( + const RefVectorWithLeader& p_list, bool skipSK) +{ + ParticleSetT& p_leader = p_list.getLeader(); + ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]); + + for (ParticleSetT& pset : p_list) { + pset.coordinates_->donePbyP(); + pset.active_ptcl_ = -1; + } + + if (!skipSK && p_leader.structure_factor_) { + auto sk_list = extractSKRefList(p_list); + StructFactT::mw_updateAllPart( + sk_list, p_list, p_leader.mw_structure_factor_data_handle_); + } + + auto& dts = p_leader.DistTables; + for (int i = 0; i < dts.size(); ++i) { + const auto dt_list(extractDTRefList(p_list, i)); + dts[i]->mw_finalizePbyP(dt_list, p_list); + } +} + +template +void +ParticleSetT::makeVirtualMoves(const SingleParticlePos& newpos) +{ + active_ptcl_ = -1; + active_pos_ = newpos; + for (size_t i = 0; i < DistTables.size(); ++i) + DistTables[i]->move(*this, newpos, active_ptcl_, false); +} + +template +void +ParticleSetT::loadWalker(Walker_t& awalker, bool pbyp) +{ + ScopedTimer update_scope(myTimers[PS_loadWalker]); + R = awalker.R; + spins = awalker.spins; + coordinates_->setAllParticlePos(R); +#if !defined(SOA_MEMORY_OPTIMIZED) + G = awalker.G; + L = awalker.L; +#endif + if (pbyp) { + // in certain cases, full tables must be ready + for (int i = 0; i < DistTables.size(); i++) + if (DistTables[i]->getModes() & DTModes::NEED_FULL_TABLE_ANYTIME) + DistTables[i]->evaluate(*this); + } + + active_ptcl_ = -1; +} + +template +void +ParticleSetT::mw_loadWalker(const RefVectorWithLeader& p_list, + const RefVector& walkers, const std::vector& recompute, + bool pbyp) +{ + auto& p_leader = p_list.getLeader(); + ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]); + + auto loadWalkerConfig = [](ParticleSetT& pset, Walker_t& awalker) { + pset.R = awalker.R; + pset.spins = awalker.spins; + pset.coordinates_->setAllParticlePos(pset.R); + }; + for (int iw = 0; iw < p_list.size(); ++iw) + if (recompute[iw]) + loadWalkerConfig(p_list[iw], walkers[iw]); + + if (pbyp) { + auto& dts = p_leader.DistTables; + for (int i = 0; i < dts.size(); ++i) { + const auto dt_list(extractDTRefList(p_list, i)); + dts[i]->mw_recompute(dt_list, p_list, recompute); + } + } +} + +template +void +ParticleSetT::saveWalker(Walker_t& awalker) +{ + awalker.R = R; + awalker.spins = spins; +#if !defined(SOA_MEMORY_OPTIMIZED) + awalker.G = G; + awalker.L = L; +#endif +} + +template +void +ParticleSetT::mw_saveWalker(const RefVectorWithLeader& psets, + const RefVector& walkers) +{ + for (int iw = 0; iw < psets.size(); ++iw) + psets[iw].saveWalker(walkers[iw]); +} + +template +void +ParticleSetT::initPropertyList() +{ + PropertyList.clear(); + // Need to add the default Properties according to the enumeration + PropertyList.add("LogPsi"); + PropertyList.add("SignPsi"); + PropertyList.add("UmbrellaWeight"); + PropertyList.add("R2Accepted"); + PropertyList.add("R2Proposed"); + PropertyList.add("DriftScale"); + PropertyList.add("AltEnergy"); + PropertyList.add("LocalEnergy"); + PropertyList.add("LocalPotential"); + + // There is no point in checking this, its quickly not consistent as other + // objects update property list. if (PropertyList.size() != + // WP::NUMPROPERTIES) + // { + // app_error() << "The number of default properties for walkers is not + // consistent." << std::endl; app_error() << "NUMPROPERTIES " << + // WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() << + // std::endl; throw std::runtime_error("ParticleSet::initPropertyList"); + // } +} + +template +int +ParticleSetT::addPropertyHistory(int leng) +{ + int newL = PropertyHistory.size(); + PropertyHistory.push_back(std::vector(leng, 0.0)); + PHindex.push_back(0); + return newL; +} + +// void ParticleSet::resetPropertyHistory( ) +// { +// for(int i=0;i +void +ParticleSetT::createResource(ResourceCollection& collection) const +{ + coordinates_->createResource(collection); + for (int i = 0; i < DistTables.size(); i++) + DistTables[i]->createResource(collection); + if (structure_factor_) + collection.addResource(std::make_unique>()); +} + +template +void +ParticleSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& p_list) +{ + auto& ps_leader = p_list.getLeader(); + ps_leader.coordinates_->acquireResource( + collection, extractCoordsRefList(p_list)); + for (int i = 0; i < ps_leader.DistTables.size(); i++) + ps_leader.DistTables[i]->acquireResource( + collection, extractDTRefList(p_list, i)); + + if (ps_leader.structure_factor_) + p_list.getLeader().mw_structure_factor_data_handle_ = + collection.lendResource>(); +} + +template +void +ParticleSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& p_list) +{ + auto& ps_leader = p_list.getLeader(); + ps_leader.coordinates_->releaseResource( + collection, extractCoordsRefList(p_list)); + for (int i = 0; i < ps_leader.DistTables.size(); i++) + ps_leader.DistTables[i]->releaseResource( + collection, extractDTRefList(p_list, i)); + + if (ps_leader.structure_factor_) + collection.takebackResource( + p_list.getLeader().mw_structure_factor_data_handle_); +} + +template +RefVectorWithLeader> +ParticleSetT::extractDTRefList( + const RefVectorWithLeader& p_list, int id) +{ + RefVectorWithLeader> dt_list( + *p_list.getLeader().DistTables[id]); + dt_list.reserve(p_list.size()); + for (ParticleSetT& p : p_list) + dt_list.push_back(*p.DistTables[id]); + return dt_list; +} + +template +RefVectorWithLeader> +ParticleSetT::extractCoordsRefList( + const RefVectorWithLeader& p_list) +{ + RefVectorWithLeader> coords_list( + *p_list.getLeader().coordinates_); + coords_list.reserve(p_list.size()); + for (ParticleSetT& p : p_list) + coords_list.push_back(*p.coordinates_); + return coords_list; +} + +template +RefVectorWithLeader> +ParticleSetT::extractSKRefList( + const RefVectorWithLeader& p_list) +{ + RefVectorWithLeader> sk_list( + *p_list.getLeader().structure_factor_); + sk_list.reserve(p_list.size()); + for (ParticleSetT& p : p_list) + sk_list.push_back(*p.structure_factor_); + return sk_list; +} + +// explicit instantiations +template class ParticleSetT; +template class ParticleSetT; +template class ParticleSetT>; +template class ParticleSetT>; + +template void +ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT& displs); +template void +ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT& displs); +template void +ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); +template void +ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); + +template void +ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT& displs); +template void +ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT& displs); +template void +ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); +template void +ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); + +template void +ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT, CoordsType::POS>& displs); +template void +ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT, CoordsType::POS_SPIN>& displs); +template void +ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); +template void +ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); + +template void +ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT, CoordsType::POS>& displs); +template void +ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, Index_t iat, + const MCCoordsT, CoordsType::POS_SPIN>& displs); +template void +ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); +template void +ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& isAccepted, bool forward_mode); +} // namespace qmcplusplus diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h new file mode 100644 index 0000000000..138b352616 --- /dev/null +++ b/src/Particle/ParticleSetT.h @@ -0,0 +1,980 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: D. Das, University of Illinois at Urbana-Champaign +// Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_PARTICLESETT_H +#define QMCPLUSPLUS_PARTICLESETT_H + +#include + +#include "DTModes.h" +#include "DynamicCoordinatesT.h" +#include "MCCoordsT.hpp" +#include "OhmmsPETE/OhmmsArray.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/ParticleSetTraits.h" +#include "ParticleTags.h" +#include "Pools/PooledData.h" +#include "ResourceHandle.h" +#include "SimulationCellT.h" +#include "SpeciesSet.h" +#include "Utilities/TimerManager.h" +#include "Walker.h" +#include "type_traits/template_types.hpp" + +namespace qmcplusplus +{ +/// forward declarations +template +class DistanceTableT; +template +class DistanceTableAAT; +template +class DistanceTableABT; +class ResourceCollection; +template +class StructFactT; +template +struct SKMultiWalkerMemT; + +/** Specialized paritlce class for atomistic simulations + * + * Derived from QMCTraits, ParticleBase and + * OhmmsElementBase. The ParticleLayout class represents a supercell + * with/without periodic boundary conditions. The ParticleLayout class also + * takes care of spatial decompositions for efficient evaluations for the + * interactions with a finite cutoff. + */ +template +class ParticleSetT : public OhmmsElementBase +{ +public: + using RealType = typename ParticleSetTraits::RealType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + using ComplexType = typename ParticleSetTraits::ComplexType; + using PosType = typename ParticleSetTraits::PosType; + + using PropertySetType = typename ParticleSetTraits::PropertySetType; + + using Index_t = typename LatticeParticleTraits::Index_t; + using Scalar_t = typename LatticeParticleTraits::Scalar_t; + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + using SingleParticlePos = + typename LatticeParticleTraits::SingleParticlePos; + using ParticleIndex = typename LatticeParticleTraits::ParticleIndex; + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using ParticleScalar = typename LatticeParticleTraits::ParticleScalar; + using ParticleGradient = + typename LatticeParticleTraits::ParticleGradient; + using ParticleLaplacian = + typename LatticeParticleTraits::ParticleLaplacian; + + /// walker type + using Walker_t = Walker, LatticeParticleTraits>; + /// container type to store the property + using PropertyContainer_t = typename Walker_t::PropertyContainer_t; + /// buffer type for a serialized buffer + using Buffer_t = PooledData; + + enum quantum_domains + { + no_quantum_domain = 0, + classical, + quantum + }; + + /// quantum_domain of the particles, default = classical + quantum_domains quantum_domain; + + //@{ public data members + /// Species ID + ParticleIndex GroupID; + /// Position + ParticlePos R; + /// internal spin variables for dynamical spin calculations + ParticleScalar spins; + /// gradients of the particles + ParticleGradient G; + /// laplacians of the particles + ParticleLaplacian L; + /// mass of each particle + ParticleScalar Mass; + /// charge of each particle + ParticleScalar Z; + + /// the index of the active bead for particle-by-particle moves + Index_t activeBead; + /// the direction reptile traveling + Index_t direction; + + /// Particle density in G-space for MPC interaction + std::vector> DensityReducedGvecs; + std::vector Density_G; + Array Density_r; + + /// DFT potential + std::vector> VHXCReducedGvecs; + std::vector VHXC_G[2]; + Array VHXC_r[2]; + + /** name-value map of Walker Properties + * + * PropertyMap is used to keep the name-value mapping of + * Walker_t::Properties. PropertyList::Values are not + * necessarily updated during the simulations. + */ + PropertySetType PropertyList; + + /** properties of the current walker + * + * The internal order is identical to PropertyList, which holds + * the matching names. + */ + PropertyContainer_t Properties; + + /** observables in addition to those registered in Properties/PropertyList + * + * Such observables as density, gofr, sk are not stored per walker but + * collected during QMC iterations. + */ + Buffer_t Collectables; + + /// Property history vector + std::vector> PropertyHistory; + std::vector PHindex; + ///@} + + /// current MC step + int current_step; + + /// default constructor + ParticleSetT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); + + /// copy constructor + ParticleSetT(const ParticleSetT& p); + + /// default destructor + ~ParticleSetT() override; + + /** create grouped particles + * @param agroup number of particles per group + */ + void + create(const std::vector& agroup); + + /** print particle coordinates to a std::ostream + * @param os output stream + * @param maxParticlesToPrint maximal number of particles to print. Pass 0 + * to print all. + */ + void + print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; + + /// dummy. For satisfying OhmmsElementBase. + bool + get(std::ostream& os) const override; + /// dummy. For satisfying OhmmsElementBase. + bool + put(std::istream&) override; + /// dummy. For satisfying OhmmsElementBase. + void + reset() override; + + /// initialize ParticleSet from xmlNode + bool + put(xmlNodePtr cur) override; + + /// specify quantum_domain of particles + void + setQuantumDomain(quantum_domains qdomain); + + void + set_quantum() + { + quantum_domain = quantum; + } + + inline bool + is_classical() const + { + return quantum_domain == classical; + } + + inline bool + is_quantum() const + { + return quantum_domain == quantum; + } + + /// check whether quantum domain is valid for particles + inline bool + quantumDomainValid(quantum_domains qdomain) const + { + return qdomain != no_quantum_domain; + } + + /// check whether quantum domain is valid for particles + inline bool + quantumDomainValid() const + { + return quantumDomainValid(quantum_domain); + } + + /** add a distance table + * @param psrc source particle set + * @param modes bitmask DistanceTable::DTModes + * + * if this->myName == psrc.getName(), AA type. Otherwise, AB type. + */ + int + addTable(const ParticleSetT& psrc, DTModes modes = DTModes::ALL_OFF); + + /// get a distance table by table_ID + inline auto& + getDistTable(int table_ID) const + { + return *DistTables[table_ID]; + } + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAA + const DistanceTableAAT& + getDistTableAA(int table_ID) const; + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAB + const DistanceTableABT& + getDistTableAB(int table_ID) const; + + /** reset all the collectable quantities during a MC iteration + */ + inline void + resetCollectables() + { + std::fill(Collectables.begin(), Collectables.end(), 0.0); + } + + /** update the internal data + *@param skip SK update if skipSK is true + */ + void + update(bool skipSK = false); + + /// batched version of update + static void + mw_update( + const RefVectorWithLeader& p_list, bool skipSK = false); + + /** create Structure Factor with PBCs + */ + void + createSK(); + + bool + hasSK() const + { + return bool(structure_factor_); + } + + /** return Structure Factor + */ + const StructFactT& + getSK() const + { + assert(structure_factor_); + return *structure_factor_; + }; + + /** Turn on per particle storage in Structure Factor + */ + void + turnOnPerParticleSK(); + + /** Get state (on/off) of per particle storage in Structure Factor + */ + bool + getPerParticleSKState() const; + + /// retrun the SpeciesSet of this particle set + inline SpeciesSet& + getSpeciesSet() + { + return my_species_; + } + /// retrun the const SpeciesSet of this particle set + inline const SpeciesSet& + getSpeciesSet() const + { + return my_species_; + } + + /// return parent's name + inline const std::string& + parentName() const + { + return ParentName; + } + inline void + setName(const std::string& aname) + { + myName = aname; + if (ParentName == "0") { + ParentName = aname; + } + } + + inline const DynamicCoordinatesT& + getCoordinates() const + { + return *coordinates_; + } + + void + resetGroups(); + + const auto& + getSimulationCell() const + { + return simulation_cell_; + } + const auto& + getLattice() const + { + return simulation_cell_.getLattice(); + } + auto& + getPrimitiveLattice() const + { + return const_cast(simulation_cell_.getPrimLattice()); + } + const auto& + getLRBox() const + { + return simulation_cell_.getLRBox(); + } + + inline bool + isSameMass() const + { + return same_mass_; + } + inline bool + isSpinor() const + { + return is_spinor_; + } + inline void + setSpinor(bool is_spinor) + { + is_spinor_ = is_spinor; + } + + /// return active particle id + inline Index_t + getActivePtcl() const + { + return active_ptcl_; + } + inline const PosType& + getActivePos() const + { + return active_pos_; + } + inline Scalar_t + getActiveSpinVal() const + { + return active_spin_val_; + } + + /// return the active position if the particle is active or the return + /// current position if not + inline const PosType& + activeR(int iat) const + { + // When active_ptcl_ == iat, a move has been proposed. + return (active_ptcl_ == iat) ? active_pos_ : R[iat]; + } + + /// return the active spin value if the particle is active or return the + /// current spin value if not + inline const Scalar_t& + activeSpin(int iat) const + { + // When active_ptcl_ == iat, a move has been proposed. + return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat]; + } + + /** move the iat-th particle to active_pos_ + * @param iat the index of the particle to be moved + * @param displ the displacement of the iat-th particle position + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + * + * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a + * proposed move. Evaluate the related distance table data + * DistanceTable::Temp. If maybe_accept = false, certain operations for + * accepting moves will be skipped for optimal performance. + */ + void + makeMove( + Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true); + /// makeMove, but now includes an update to the spin variable + void + makeMoveWithSpin( + Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); + + /// batched version of makeMove + template + static void + mw_makeMove(const RefVectorWithLeader>& p_list, Index_t iat, + const MCCoordsT& displs); + + static void + mw_makeMove(const RefVectorWithLeader& p_list, Index_t iat, + const std::vector& displs); + + /// batched version makeMove for spin variable only + static void + mw_makeSpinMove(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& sdispls); + + /** move the iat-th particle to active_pos_ + * @param iat the index of the particle to be moved + * @param displ random displacement of the iat-th particle + * @return true, if the move is valid + * + * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a + * proposed move. Evaluate the related distance table data + * DistanceTable::Temp. + * + * When a Lattice is defined, passing two checks makes a move valid. + * outOfBound(displ): invalid move, if displ is larger than half, currently, + * of the box in any direction isValid(Lattice.toUnit(active_pos_)): invalid + * move, if active_pos_ goes out of the Lattice in any direction marked with + * open BC. Note: active_pos_ and distances tables are always evaluated no + * matter the move is valid or not. + */ + bool + makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ); + /// makeMoveAndCheck, but now includes an update to the spin variable + bool + makeMoveAndCheckWithSpin( + Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); + + /** Handles virtual moves for all the particles to a single newpos. + * + * The state active_ptcl_ remains -1 and rejectMove is not needed. + * acceptMove can not be used. + * See QMCHamiltonians::MomentumEstimator as an example + */ + void + makeVirtualMoves(const SingleParticlePos& newpos); + + /** move all the particles of a walker + * @param awalker the walker to operate + * @param deltaR proposed displacement + * @param dt factor of deltaR + * @return true if all the moves are legal. + * + * If big displacements or illegal positions are detected, return false. + * If all good, R = awalker.R + dt* deltaR + */ + bool + makeMoveAllParticles( + const Walker_t& awalker, const ParticlePos& deltaR, RealType dt); + + bool + makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, + const std::vector& dt); + + /** move all the particles including the drift + * + * Otherwise, everything is the same as makeMove for a walker + */ + bool + makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, const ParticlePos& deltaR, RealType dt); + + bool + makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, const ParticlePos& deltaR, + const std::vector& dt); + + /** accept or reject a proposed move + * Two operation modes: + * The using and updating distance tables via `ParticleSet` operate in two + * modes, regular and forward modes. + * + * Regular mode + * The regular mode can only be used when the distance tables for particle + * pairs are fully up-to-date. This is the case after calling + * `ParticleSet::update()` in a unit test or after p-by-p moves in a QMC + * driver. In this mode, the distance tables remain up-to-date after calling + * `ParticleSet::acceptMove` and calling `ParticleSet::rejectMove` is not + * mandatory. + * + * Forward mode + * The forward mode assumes that distance table is not fully up-to-date + * until every particle is accepted or rejected to move once in order. This + * is the mode used in the p-by-p part of drivers. In this mode, calling + * `ParticleSet::accept_rejectMove` is required to handle accept/reject + * rather than calling individual `ParticleSet::acceptMove` and + * `ParticleSet::reject`. `ParticleSet::accept_rejectMove(iel)` ensures the + * distance tables (jel < iel) part is fully up-to-date regardless a move is + * accepted or rejected. For this reason, the rejecting operation inside + * `ParticleSet::accept_rejectMove` involves writing the distances with + * respect to the old particle position. + */ + void + accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true); + + /** accept the move and update the particle attribute by the proposed move + *in regular mode + *@param iat the index of the particle whose position and other attributes + *to be updated + */ + void + acceptMove(Index_t iat); + + /** reject a proposed move in regular mode + * @param iat the electron whose proposed move gets rejected. + */ + void + rejectMove(Index_t iat); + + /// batched version of acceptMove and rejectMove fused, templated on + /// CoordsType + template + static void + mw_accept_rejectMoveT(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& isAccepted, + bool forward_mode = true); + + /// batched version of acceptMove and rejectMove fused + static void + mw_accept_rejectMove(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& isAccepted, + bool forward_mode = true); + + /** batched version of acceptMove and reject Move fused, but only for spins + * + * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_ + * gets reset to -1 This would cause the assertion that we have the right + * particle index to fail if done in the wrong order + */ + static void + mw_accept_rejectSpinMove(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& isAccepted); + + void + initPropertyList(); + inline int + addProperty(const std::string& pname) + { + return PropertyList.add(pname.c_str()); + } + + int + addPropertyHistory(int leng); + // void rejectedMove(); + // void resetPropertyHistory( ); + // void addPropertyHistoryPoint(int index, RealType data); + + void + convert(const ParticlePos& pin, ParticlePos& pout); + void + convert2Unit(const ParticlePos& pin, ParticlePos& pout); + void + convert2Cart(const ParticlePos& pin, ParticlePos& pout); + void + convert2Unit(ParticlePos& pout); + void + convert2Cart(ParticlePos& pout); + void + convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout); + void + convert2CartInBox(const ParticlePos& pint, ParticlePos& pout); + + void + applyBC(const ParticlePos& pin, ParticlePos& pout); + void + applyBC(ParticlePos& pos); + void + applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last); + void + applyMinimumImage(ParticlePos& pinout); + + /** load a Walker_t to the current ParticleSet + * @param awalker the reference to the walker to be loaded + * @param pbyp true if it is used by PbyP update + * + * PbyP requires the distance tables and Sk with awalker.R + */ + void + loadWalker(Walker_t& awalker, bool pbyp); + /** batched version of loadWalker */ + static void + mw_loadWalker(const RefVectorWithLeader& p_list, + const RefVector& walkers, const std::vector& recompute, + bool pbyp); + + /** save this to awalker + * + * just the R, G, and L + * More duplicate data that makes code difficult to reason about should be + * removed. + */ + void + saveWalker(Walker_t& awalker); + + /** batched version of saveWalker + * + * just the R, G, and L + */ + static void + mw_saveWalker(const RefVectorWithLeader& psets, + const RefVector& walkers); + + /** update structure factor and unmark active_ptcl_ + *@param skip SK update if skipSK is true + * + * The Coulomb interaction evaluation needs the structure factor. + * For these reason, call donePbyP after the loop of single + * electron moves before evaluating the Hamiltonian. Unmark + * active_ptcl_ is more of a safety measure probably not needed. + */ + void + donePbyP(bool skipSK = false); + /// batched version of donePbyP + static void + mw_donePbyP( + const RefVectorWithLeader& p_list, bool skipSK = false); + + /// return the address of the values of Hamiltonian terms + inline FullPrecRealType* restrict getPropertyBase() + { + return Properties.data(); + } + + /// return the address of the values of Hamiltonian terms + inline const FullPrecRealType* restrict getPropertyBase() const + { + return Properties.data(); + } + + /// return the address of the i-th properties + inline FullPrecRealType* restrict getPropertyBase(int i) + { + return Properties[i]; + } + + /// return the address of the i-th properties + inline const FullPrecRealType* restrict getPropertyBase(int i) const + { + return Properties[i]; + } + + inline void + setTwist(const SingleParticlePos& t) + { + myTwist = t; + } + inline const SingleParticlePos& + getTwist() const + { + return myTwist; + } + + /** Initialize particles around another ParticleSet + * Used to initialize an electron ParticleSet by an ion ParticleSet + */ + void + randomizeFromSource(ParticleSetT& src); + + /** get species name of particle i + */ + inline const std::string& + species_from_index(int i) + { + return my_species_.speciesName[GroupID[i]]; + } + + inline size_t + getTotalNum() const + { + return TotalNum; + } + + inline void + clear() + { + TotalNum = 0; + + R.clear(); + spins.clear(); + GroupID.clear(); + G.clear(); + L.clear(); + Mass.clear(); + Z.clear(); + + coordinates_->resize(0); + } + + /// return the number of groups + inline int + groups() const + { + return group_offsets_->size() - 1; + } + + /// return the first index of a group i + inline int + first(int igroup) const + { + return (*group_offsets_)[igroup]; + } + + /// return the last index of a group i + inline int + last(int igroup) const + { + return (*group_offsets_)[igroup + 1]; + } + + /// return the group id of a given particle in the particle set. + inline int + getGroupID(int iat) const + { + assert(iat >= 0 && iat < TotalNum); + return GroupID[iat]; + } + + /// return the size of a group + inline int + groupsize(int igroup) const + { + return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup]; + } + + /// add attributes to list for IO + template + inline void + createAttributeList(ATList& AttribList) + { + R.setTypeName(ParticleTags::postype_tag); + R.setObjName(ParticleTags::position_tag); + spins.setTypeName(ParticleTags::scalartype_tag); + spins.setObjName(ParticleTags::spins_tag); + GroupID.setTypeName(ParticleTags::indextype_tag); + GroupID.setObjName(ParticleTags::ionid_tag); + // add basic attributes + AttribList.add(R); + AttribList.add(spins); + AttribList.add(GroupID); + + G.setTypeName(ParticleTags::gradtype_tag); + L.setTypeName(ParticleTags::laptype_tag); + + G.setObjName("grad"); + L.setObjName("lap"); + + AttribList.add(G); + AttribList.add(L); + + // more particle attributes + Mass.setTypeName(ParticleTags::scalartype_tag); + Mass.setObjName("mass"); + AttribList.add(Mass); + + Z.setTypeName(ParticleTags::scalartype_tag); + Z.setObjName("charge"); + AttribList.add(Z); + } + + inline void + setMapStorageToInput(const std::vector& mapping) + { + map_storage_to_input_ = mapping; + } + inline const std::vector& + get_map_storage_to_input() const + { + return map_storage_to_input_; + } + + inline int + getNumDistTables() const + { + return DistTables.size(); + } + + inline auto& + get_group_offsets() const + { + return *group_offsets_; + } + + /// initialize a shared resource and hand it to a collection + void + createResource(ResourceCollection& collection) const; + /** acquire external resource and assocaite it with the list of ParticleSet + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& p_list); + /** release external resource + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& p_list); + + static RefVectorWithLeader> + extractDTRefList(const RefVectorWithLeader& p_list, int id); + static RefVectorWithLeader> + extractCoordsRefList(const RefVectorWithLeader& p_list); + static RefVectorWithLeader> + extractSKRefList(const RefVectorWithLeader& p_list); + +protected: + /// reference to global simulation cell + const SimulationCellT& simulation_cell_; + + /// true if the particles have the same mass + bool same_mass_; + /// true is a dynamic spin calculation + bool is_spinor_; + /** the index of the active particle during particle-by-particle moves + * + * when a single particle move is proposed, the particle id is assigned to + * active_ptcl_ No matter the move is accepted or rejected, active_ptcl_ is + * marked back to -1. This state flag is used for picking coordinates and + * distances for SPO evaluation. + */ + Index_t active_ptcl_; + /// the proposed position of active_ptcl_ during particle-by-particle moves + SingleParticlePos active_pos_; + /// the proposed spin of active_ptcl_ during particle-by-particle moves + Scalar_t active_spin_val_; + + /** Map storage index to the input index. + * If not empty, particles were reordered by groups when being loaded from + * XML input. When other input data are affected by reordering, its builder + * should query this mapping. map_storage_to_input_[5] = 2 means the index + * 5(6th) particle in this ParticleSet was read from the index 2(3th) + * particle in the XML input + */ + std::vector map_storage_to_input_; + + /// SpeciesSet of particles + SpeciesSet my_species_; + + /// Structure factor + std::unique_ptr> structure_factor_; + + /// multi walker structure factor data + ResourceHandle> mw_structure_factor_data_handle_; + + /** map to handle distance tables + * + * myDistTableMap[source-particle-tag]= locator in the distance table + * myDistTableMap[ObjectTag] === 0 + */ + std::map myDistTableMap; + + /// distance tables that need to be updated by moving this ParticleSet + std::vector>> DistTables; + + /// Descriptions from distance table creation. Same order as DistTables. + std::vector distTableDescriptions; + + TimerList_t myTimers; + + SingleParticlePos myTwist; + + std::string ParentName; + + /// total number of particles + size_t TotalNum; + + /// array to handle a group of distinct particles per species + std::shared_ptr>> group_offsets_; + + /// internal representation of R. It can be an SoA copy of R + std::unique_ptr> coordinates_; + + /** compute temporal DistTables and SK for a new particle position + * + * @param iat the particle that is moved on a sphere + * @param newpos a new particle position + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + */ + void + computeNewPosDistTables( + Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true); + + /** compute temporal DistTables and SK for a new particle position for each + * walker in a batch + * + * @param p_list the list of wrapped ParticleSet references in a walker + * batch + * @param iat the particle that is moved on a sphere + * @param new_positions new particle positions + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + */ + static void + mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, + Index_t iat, const std::vector& new_positions, + bool maybe_accept = true); + + /** actual implemenation for accepting a proposed move in forward mode + * + * @param iat the index of the particle whose position and other attributes + * to be updated + */ + void + acceptMoveForwardMode(Index_t iat); + + /** reject a proposed move in forward mode + * @param iat the electron whose proposed move gets rejected. + */ + void + rejectMoveForwardMode(Index_t iat); + + /// resize internal storage + inline void + resize(size_t numPtcl) + { + TotalNum = numPtcl; + + R.resize(numPtcl); + spins.resize(numPtcl); + GroupID.resize(numPtcl); + G.resize(numPtcl); + L.resize(numPtcl); + Mass.resize(numPtcl); + Z.resize(numPtcl); + + coordinates_->resize(numPtcl); + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h new file mode 100644 index 0000000000..3ea028b54f --- /dev/null +++ b/src/Particle/ParticleSetTraits.h @@ -0,0 +1,85 @@ +#ifndef QMCPLUSPLUS_PARTICLESETTRAITS_H +#define QMCPLUSPLUS_PARTICLESETTRAITS_H + +#include + +#include "OhmmsData/RecordProperty.h" +#include "OhmmsPETE/Tensor.h" +#include "OhmmsPETE/TinyVector.h" +#include "Particle/Lattice/CrystalLattice.h" +#include "Particle/ParticleBase/ParticleAttrib.h" +#include "type_traits/complex_help.hpp" + +namespace qmcplusplus +{ +template +struct ParticleSetTraits +{ + enum + { + DIM = OHMMS_DIM + }; + using RealType = RealAlias; + using ComplexType = std::complex; + using ValueType = T; + using IndexType = int; + using PosType = TinyVector; + using GradType = TinyVector; + // using HessType = Tensor; + // using TensorType = Tensor; + // using GradHessType = TinyVector, DIM>; + // using IndexVector = Vector; + // using ValueVector = Vector; + // using ValueMatrix = Matrix; + // using GradVector = Vector; + // using GradMatrix = Matrix; + // using HessVector = Vector; + // using HessMatrix = Matrix; + // using GradHessVector = Vector; + // using GradHessMatrix = Matrix; + // using VGLVector = VectorSoaContainer; + + using FullPrecRealType = double; + using FullPrecComplexType = std::complex; + using FullPrecValueType = std::conditional_t::value, + FullPrecComplexType, FullPrecRealType>; + + using PropertySetType = RecordNamedProperty; +}; + +template +struct LatticeParticleTraits +{ + enum + { + DIM = OHMMS_DIM + }; + using RealType = typename ParticleSetTraits::RealType; + + using ParticleLayout = CrystalLattice; + using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex; + using SingleParticlePos = typename ParticleLayout::SingleParticlePos; + using ParticleTensorType = typename ParticleLayout::Tensor_t; + + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + using FullPrecComplexType = + typename ParticleSetTraits::FullPrecComplexType; + using FullPrecValueType = typename ParticleSetTraits::FullPrecValueType; + + using FullPrecGradType = TinyVector; + + using Index_t = int; + using Scalar_t = FullPrecRealType; + using Complex_t = FullPrecComplexType; + + using ParticleIndex = ParticleAttrib; + using ParticleScalar = ParticleAttrib; + using ParticlePos = ParticleAttrib; + using ParticleTensor = ParticleAttrib; + + using ParticleGradient = ParticleAttrib; + using ParticleLaplacian = ParticleAttrib; + using SingleParticleValue = FullPrecValueType; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/RealSpacePositionsT.h b/src/Particle/RealSpacePositionsT.h new file mode 100644 index 0000000000..7cd81723b6 --- /dev/null +++ b/src/Particle/RealSpacePositionsT.h @@ -0,0 +1,96 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file RealSpacePostions.h + */ +#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_H +#define QMCPLUSPLUS_REALSPACE_POSITIONST_H + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/DynamicCoordinatesT.h" + +namespace qmcplusplus +{ +/** Introduced to handle virtual moves and ratio computations, e.g. for + * non-local PP evaluations. + */ +template +class RealSpacePositionsT : public DynamicCoordinatesT +{ +public: + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using RealType = typename DynamicCoordinatesT::RealType; + using PosType = typename DynamicCoordinatesT::PosType; + using PosVectorSoa = typename DynamicCoordinatesT::PosVectorSoa; + + RealSpacePositionsT() : + DynamicCoordinatesT(DynamicCoordinateKind::DC_POS) + { + } + + std::unique_ptr> + makeClone() override + { + return std::make_unique(*this); + } + + void + resize(size_t n) override + { + RSoA.resize(n); + } + size_t + size() const override + { + return RSoA.size(); + } + + void + setAllParticlePos(const ParticlePos& R) override + { + resize(R.size()); + RSoA.copyIn(R); + } + void + setOneParticlePos(const PosType& pos, size_t iat) override + { + RSoA(iat) = pos; + } + + void + mw_acceptParticlePos( + const RefVectorWithLeader>& coords_list, + size_t iat, const std::vector& new_positions, + const std::vector& isAccepted) const override + { + assert(this == &coords_list.getLeader()); + for (size_t iw = 0; iw < isAccepted.size(); iw++) + if (isAccepted[iw]) + coords_list[iw].setOneParticlePos(new_positions[iw], iat); + } + + const PosVectorSoa& + getAllParticlePos() const override + { + return RSoA; + } + PosType + getOneParticlePos(size_t iat) const override + { + return RSoA[iat]; + } + +private: + /// particle positions in SoA layout + PosVectorSoa RSoA; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/RealSpacePositionsTOMPTarget.h b/src/Particle/RealSpacePositionsTOMPTarget.h new file mode 100644 index 0000000000..57a81f6c85 --- /dev/null +++ b/src/Particle/RealSpacePositionsTOMPTarget.h @@ -0,0 +1,328 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file RealSpacePostionsOMPTarget.h + */ +#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H +#define QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H + +#include "OMPTarget/OMPallocator.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/DynamicCoordinatesT.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/** Introduced to handle virtual moves and ratio computations, e.g. for + * non-local PP evaluations. + */ +template +class RealSpacePositionsTOMPTarget : public DynamicCoordinatesT +{ +public: + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using RealType = typename DynamicCoordinatesT::RealType; + using PosType = typename DynamicCoordinatesT::PosType; + using PosVectorSoa = typename DynamicCoordinatesT::PosVectorSoa; + static constexpr auto DIM = ParticleSetTraits::DIM; + + RealSpacePositionsTOMPTarget() : + DynamicCoordinatesT(DynamicCoordinateKind::DC_POS_OFFLOAD), + is_host_position_changed_(false) + { + } + RealSpacePositionsTOMPTarget(const RealSpacePositionsTOMPTarget& in) : + DynamicCoordinatesT(DynamicCoordinateKind::DC_POS_OFFLOAD), + RSoA(in.RSoA) + { + RSoA_hostview.attachReference( + RSoA.size(), RSoA.capacity(), RSoA.data()); + updateH2D(); + } + + std::unique_ptr> + makeClone() override + { + return std::make_unique(*this); + } + + void + resize(size_t n) override + { + if (RSoA.size() != n) { + RSoA.resize(n); + RSoA_hostview.attachReference( + RSoA.size(), RSoA.capacity(), RSoA.data()); + } + } + + size_t + size() const override + { + return RSoA_hostview.size(); + } + + void + setAllParticlePos(const ParticlePos& R) override + { + resize(R.size()); + RSoA_hostview.copyIn(R); + updateH2D(); + is_nw_new_pos_prepared = false; + } + + void + setOneParticlePos(const PosType& pos, size_t iat) override + { + RSoA_hostview(iat) = pos; + is_host_position_changed_ = true; + /* This was too slow due to overhead. + RealType x = pos[0]; + RealType y = pos[1]; + RealType z = pos[2]; + RealType* data = RSoA.data(); + size_t offset = RSoA.capacity(); + + PRAGMA_OFFLOAD("omp target map(to : x, y, z, iat)") + { + data[iat] = x; + data[iat + offset] = y; + data[iat + offset * 2] = z; + } + */ + } + + void + mw_copyActivePos( + const RefVectorWithLeader>& coords_list, + size_t iat, const std::vector& new_positions) const override + { + assert(this == &coords_list.getLeader()); + auto& coords_leader = + coords_list + .template getCastedLeader(); + + const auto nw = coords_list.size(); + auto& mw_new_pos = + coords_leader.mw_mem_handle_.getResource().mw_new_pos; + mw_new_pos.resize(nw); + + for (int iw = 0; iw < nw; iw++) + mw_new_pos(iw) = new_positions[iw]; + + auto* mw_pos_ptr = mw_new_pos.data(); + PRAGMA_OFFLOAD("omp target update to(\ + mw_pos_ptr[DIM * mw_new_pos.capacity()])") + + coords_leader.is_nw_new_pos_prepared = true; + } + + void + mw_acceptParticlePos( + const RefVectorWithLeader>& coords_list, + size_t iat, const std::vector& new_positions, + const std::vector& isAccepted) const override + { + assert(this == &coords_list.getLeader()); + const size_t nw = coords_list.size(); + auto& coords_leader = + coords_list + .template getCastedLeader(); + MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_; + auto& mw_new_pos = mw_mem.mw_new_pos; + auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs; + auto& mw_accept_indices = mw_mem.mw_accept_indices; + + if (!is_nw_new_pos_prepared) { + mw_copyActivePos(coords_list, iat, new_positions); + app_warning() << "This message only appear in unit tests. Report a " + "bug if seen in production code." + << std::endl; + } + + coords_leader.is_nw_new_pos_prepared = false; + + mw_accept_indices.resize(nw); + auto* restrict id_array = mw_accept_indices.data(); + + size_t num_accepted = 0; + for (int iw = 0; iw < nw; iw++) + if (isAccepted[iw]) { + auto& coords = coords_list.template getCastedElement< + RealSpacePositionsTOMPTarget>(iw); + id_array[num_accepted] = iw; + // save new coordinates on host copy + coords.RSoA_hostview(iat) = mw_new_pos[iw]; + num_accepted++; + } + + // early return to avoid OpenMP runtime mishandling of size 0 in + // transfer/compute. + if (num_accepted == 0) + return; + + // offload to GPU + auto* restrict mw_pos_ptr = mw_new_pos.data(); + auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data(); + const size_t rsoa_stride = RSoA.capacity(); + const size_t mw_pos_stride = mw_new_pos.capacity(); + + PRAGMA_OFFLOAD("omp target teams distribute parallel for \ + map(always, to : id_array[:num_accepted])") + for (int i = 0; i < num_accepted; i++) { + const int iw = id_array[i]; + RealType* RSoA_dev_ptr = mw_rosa_ptr[iw]; + for (int id = 0; id < QMCTraits::DIM; id++) + RSoA_dev_ptr[iat + rsoa_stride * id] = + mw_pos_ptr[iw + mw_pos_stride * id]; + } + } + + const PosVectorSoa& + getAllParticlePos() const override + { + return RSoA_hostview; + } + PosType + getOneParticlePos(size_t iat) const override + { + return RSoA_hostview[iat]; + } + + void + donePbyP() override + { + is_nw_new_pos_prepared = false; + if (is_host_position_changed_) { + updateH2D(); + is_host_position_changed_ = false; + } + } + + const RealType* + getDevicePtr() const + { + return RSoA.device_data(); + } + + const auto& + getFusedNewPosBuffer() const + { + return mw_mem_handle_.getResource().mw_new_pos; + } + + void + createResource(ResourceCollection& collection) const override + { + auto resource_index = + collection.addResource(std::make_unique()); + } + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& coords_list) + const override + { + MultiWalkerMem& mw_mem = + coords_list.template getCastedLeader() + .mw_mem_handle_ = collection.lendResource(); + + auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs); + const auto nw = coords_list.size(); + mw_rsoa_ptrs.resize(nw); + for (int iw = 0; iw < nw; iw++) { + auto& coords = + coords_list + .template getCastedElement( + iw); + mw_rsoa_ptrs[iw] = coords.RSoA.device_data(); + } + mw_rsoa_ptrs.updateTo(); + } + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& coords_list) + const override + { + collection.takebackResource( + coords_list.template getCastedLeader() + .mw_mem_handle_); + } + + const auto& + getMultiWalkerRSoADevicePtrs() const + { + return mw_mem_handle_.getResource().mw_rsoa_ptrs; + } + +private: + /// particle positions in SoA layout + VectorSoaContainer>> + RSoA; + + /// multi walker shared memory buffer + struct MultiWalkerMem : public Resource + { + /// one particle new/old positions in SoA layout + VectorSoaContainer>> + mw_new_pos; + + /// accept list + Vector>> + mw_accept_indices; + + /// RSoA device ptr list + Vector>> + mw_rsoa_ptrs; + + MultiWalkerMem() : Resource("MultiWalkerMem") + { + } + + MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem() + { + } + + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } + }; + + ResourceHandle mw_mem_handle_; + + /// host view of RSoA + PosVectorSoa RSoA_hostview; + + /// if true, host position has been changed while the device copy has not + /// been updated. + bool is_host_position_changed_; + + /// if true, mw_new_pos has been updated with active positions. + bool is_nw_new_pos_prepared; + + void + updateH2D() + { + RealType* data = RSoA.data(); + PRAGMA_OFFLOAD( + "omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])") + is_host_position_changed_ = false; + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SimulationCellT.cpp b/src/Particle/SimulationCellT.cpp new file mode 100644 index 0000000000..8ad7295bb6 --- /dev/null +++ b/src/Particle/SimulationCellT.cpp @@ -0,0 +1,74 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "SimulationCellT.h" +#include "Platforms/Host/OutputManager.h" + +namespace qmcplusplus +{ +template +SimulationCellT::SimulationCellT() = default; + +template +SimulationCellT::SimulationCellT(const Lattice& lattice) : lattice_(lattice) +{ + resetLRBox(); +} + +template +void +SimulationCellT::resetLRBox() +{ + if (lattice_.SuperCellEnum != SUPERCELL_OPEN) { + lattice_.SetLRCutoffs(lattice_.Rv); + LRBox_ = lattice_; + bool changed = false; + if (lattice_.SuperCellEnum == SUPERCELL_SLAB && + lattice_.VacuumScale != 1.0) { + LRBox_.R(2, 0) *= lattice_.VacuumScale; + LRBox_.R(2, 1) *= lattice_.VacuumScale; + LRBox_.R(2, 2) *= lattice_.VacuumScale; + changed = true; + } + else if (lattice_.SuperCellEnum == SUPERCELL_WIRE && + lattice_.VacuumScale != 1.0) { + LRBox_.R(1, 0) *= lattice_.VacuumScale; + LRBox_.R(1, 1) *= lattice_.VacuumScale; + LRBox_.R(1, 2) *= lattice_.VacuumScale; + LRBox_.R(2, 0) *= lattice_.VacuumScale; + LRBox_.R(2, 1) *= lattice_.VacuumScale; + LRBox_.R(2, 2) *= lattice_.VacuumScale; + changed = true; + } + LRBox_.reset(); + LRBox_.SetLRCutoffs(LRBox_.Rv); + LRBox_.printCutoffs(app_log()); + + if (changed) { + app_summary() + << " Simulation box changed by vacuum supercell conditions" + << std::endl; + app_log() << "--------------------------------------- " + << std::endl; + LRBox_.print(app_log()); + app_log() << "--------------------------------------- " + << std::endl; + } + + k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim); + } +} + +template class SimulationCellT; +template class SimulationCellT; +template class SimulationCellT>; +template class SimulationCellT>; +} // namespace qmcplusplus diff --git a/src/Particle/SimulationCellT.h b/src/Particle/SimulationCellT.h new file mode 100644 index 0000000000..ff8240325a --- /dev/null +++ b/src/Particle/SimulationCellT.h @@ -0,0 +1,71 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SIMULATIONCELLT_H +#define QMCPLUSPLUS_SIMULATIONCELLT_H + +#include "LongRange/KContainerT.h" +#include "ParticleSetTraits.h" + +namespace qmcplusplus +{ +class ParticleSetPool; + +template +class SimulationCellT +{ +public: + using Lattice = typename LatticeParticleTraits::ParticleLayout; + + SimulationCellT(); + SimulationCellT(const Lattice& lattice); + + const Lattice& + getLattice() const + { + return lattice_; + } + const Lattice& + getPrimLattice() const + { + return primative_lattice_; + } + const Lattice& + getLRBox() const + { + return LRBox_; + } + + void + resetLRBox(); + + /// access k_lists_ read only + const KContainerT& + getKLists() const + { + return k_lists_; + } + +private: + /// simulation cell lattice + Lattice lattice_; + /// Primative cell lattice + Lattice primative_lattice_; + /// long-range box + Lattice LRBox_; + + /// K-Vector List. + KContainerT k_lists_; + + friend class ParticleSetPool; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableAAT.h b/src/Particle/SoaDistanceTableAAT.h new file mode 100644 index 0000000000..289bcc22a8 --- /dev/null +++ b/src/Particle/SoaDistanceTableAAT.h @@ -0,0 +1,237 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_AAT_H +#define QMCPLUSPLUS_DTDIMPL_AAT_H + +#include "CPU/SIMD/algorithm.hpp" +#include "Lattice/ParticleBConds3DSoa.h" +#include "Particle/DistanceTableT.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for dense case + */ +template +struct SoaDistanceTableAAT : + public DTD_BConds::RealType, D, SC>, + public DistanceTableAAT +{ + using RealType = typename DistanceTableAAT::RealType; + using PosType = typename DistanceTableAAT::PosType; + using IndexType = typename DistanceTableAAT::IndexType; + + /// actual memory for dist and displacements_ + aligned_vector memory_pool_; + + SoaDistanceTableAAT(ParticleSetT& target) : + DTD_BConds(target.getLattice()), + DistanceTableAAT(target, DTModes::ALL_OFF), + num_targets_padded_(getAlignedSize(this->num_targets_)), +#if !defined(NDEBUG) + old_prepared_elec_id_(-1), +#endif + evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") + + target.getName() + "_" + target.getName(), + timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTAA::move_") + + target.getName() + "_" + target.getName(), + timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTAA::update_") + + target.getName() + "_" + target.getName(), + timer_level_fine)) + { + resize(); + } + + SoaDistanceTableAAT() = delete; + SoaDistanceTableAAT(const SoaDistanceTableAAT&) = delete; + ~SoaDistanceTableAAT() override + { + } + + size_t + compute_size(int N) const + { + const size_t num_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (num_padded * (2 * N - num_padded + 1) + + (Alignment - 1) * num_padded) / + 2; + } + + void + resize() + { + // initialize memory containers and views + const size_t total_size = compute_size(this->num_targets_); + memory_pool_.resize(total_size * (1 + D)); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) { + this->distances_[i].attachReference( + memory_pool_.data() + compute_size(i), i); + this->displacements_[i].attachReference(i, total_size, + memory_pool_.data() + total_size + compute_size(i)); + } + + this->old_r_.resize(this->num_targets_); + this->old_dr_.resize(this->num_targets_); + this->temp_r_.resize(this->num_targets_); + this->temp_dr_.resize(this->num_targets_); + } + + inline void + evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); + constexpr RealType BigR = std::numeric_limits::max(); + for (int iat = 1; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], + P.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, iat, + iat); + } + + /// evaluate the temporary pair relations + inline void + move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, + bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + +#if !defined(NDEBUG) + old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + DTD_BConds::computeDistances(rnew, + P.getCoordinates().getAllParticlePos(), this->temp_r_.data(), + this->temp_dr_, 0, this->num_targets_, iat); + // set up old_r_ and old_dr_ for moves may get accepted. + if (prepare_old) { + // recompute from scratch + DTD_BConds::computeDistances(P.R[iat], + P.getCoordinates().getAllParticlePos(), this->old_r_.data(), + this->old_dr_, 0, this->num_targets_, iat); + this->old_r_[iat] = + std::numeric_limits::max(); // assign a big number + } + } + + int + get_first_neighbor( + IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + // ensure there are neighbors + assert(this->num_targets_ > 1); + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) { + for (int jat = 0; jat < this->num_targets_; ++jat) + if (this->temp_r_[jat] < min_dist && jat != iat) { + min_dist = this->temp_r_[jat]; + index = jat; + } + assert(index >= 0); + dr = this->temp_dr_[index]; + } + else { + for (int jat = 0; jat < iat; ++jat) + if (this->distances_[iat][jat] < min_dist) { + min_dist = this->distances_[iat][jat]; + index = jat; + } + for (int jat = iat + 1; jat < this->num_targets_; ++jat) + if (this->distances_[jat][iat] < min_dist) { + min_dist = this->distances_[jat][iat]; + index = jat; + } + assert(index != iat && index >= 0); + if (index < iat) + dr = this->displacements_[iat][index]; + else + dr = this->displacements_[index][iat]; + } + r = min_dist; + return index; + } + + /** After accepting the iat-th particle, update the iat-th row of distances_ + * and displacements_. Upper triangle is not needed in the later computation + * and thus not updated + */ + inline void + update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + // update [0, iat) + const int nupdate = iat; + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n( + this->temp_r_.data(), nupdate, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, + this->displacements_[iat].data(idim)); + // copy column + for (size_t i = iat + 1; i < this->num_targets_; ++i) { + this->distances_[i][iat] = this->temp_r_[i]; + this->displacements_[i](iat) = -this->temp_dr_[i]; + } + } + + void + updatePartial(IndexType jat, bool from_temp) override + { + ScopedTimer local_timer(update_timer_); + // update [0, jat) + const int nupdate = jat; + if (from_temp) { + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n( + this->temp_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, + this->displacements_[jat].data(idim)); + } + else { + assert(old_prepared_elec_id_ == jat); + // copy row + assert(nupdate <= this->old_r_.size()); + std::copy_n( + this->old_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->old_dr_.data(idim), nupdate, + this->displacements_[jat].data(idim)); + } + } + +private: + /// number of targets with padding + const size_t num_targets_padded_; +#if !defined(NDEBUG) + /** set to particle id after move() with prepare_old = true. -1 means not + * prepared. It is intended only for safety checks, not for codepath + * selection. + */ + int old_prepared_elec_id_; +#endif + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableAATOMPTarget.h b/src/Particle/SoaDistanceTableAATOMPTarget.h new file mode 100644 index 0000000000..e9a453fcd3 --- /dev/null +++ b/src/Particle/SoaDistanceTableAATOMPTarget.h @@ -0,0 +1,624 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H +#define QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H + +#include "CPU/SIMD/algorithm.hpp" +#include "DistanceTableT.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "OMPTarget/OMPallocator.hpp" +#include "Particle/RealSpacePositionsTOMPTarget.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for dense case + */ +template +struct SoaDistanceTableAATOMPTarget : + public DTD_BConds::RealType, D, SC>, + public DistanceTableAAT +{ + using RealType = typename DistanceTableAAT::RealType; + using PosType = typename DistanceTableAAT::PosType; + using IndexType = typename DistanceTableAAT::IndexType; + using DistRow = typename DistanceTableAAT::DistRow; + using DisplRow = typename DistanceTableAAT::DisplRow; + + /// actual memory for dist and displacements_ + aligned_vector memory_pool_; + + /// actual memory for temp_r_ + DistRow temp_r_mem_; + /// actual memory for temp_dr_ + DisplRow temp_dr_mem_; + /// actual memory for old_r_ + DistRow old_r_mem_; + /// actual memory for old_dr_ + DisplRow old_dr_mem_; + + /// multi walker shared memory buffer + struct DTAAMultiWalkerMem : public Resource + { + /// dist displ for temporary and old pairs + Vector>> + mw_new_old_dist_displ; + + /** distances from a range of indics to the source. + * for original particle index i (row) and source particle id j (col) + * j < i, the element data is dist(r_i - r_j) + * j > i, the element data is dist(r_(n - 1 - i) - r_(n - 1 - j)) + */ + Vector>> + mw_distances_subset; + + DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") + { + } + + DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() + { + } + + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } + }; + + ResourceHandle mw_mem_handle_; + + SoaDistanceTableAATOMPTarget(ParticleSetT& target) : + DTD_BConds(target.getLattice()), + DistanceTableAAT(target, DTModes::ALL_OFF), + num_targets_padded_(getAlignedSize(this->num_targets_)), +#if !defined(NDEBUG) + old_prepared_elec_id_(-1), +#endif + offload_timer_(createGlobalTimer( + std::string("DTAAOMPTarget::offload_") + this->name_, + timer_level_fine)), + evaluate_timer_(createGlobalTimer( + std::string("DTAAOMPTarget::evaluate_") + this->name_, + timer_level_fine)), + move_timer_( + createGlobalTimer(std::string("DTAAOMPTarget::move_") + this->name_, + timer_level_fine)), + update_timer_(createGlobalTimer( + std::string("DTAAOMPTarget::update_") + this->name_, + timer_level_fine)) + + { + auto* coordinates_soa = + dynamic_cast*>( + &target.getCoordinates()); + if (!coordinates_soa) + throw std::runtime_error("Source particle set doesn't have OpenMP " + "offload. Contact developers!"); + resize(); + PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") + } + + SoaDistanceTableAATOMPTarget() = delete; + SoaDistanceTableAATOMPTarget(const SoaDistanceTableAATOMPTarget&) = delete; + ~SoaDistanceTableAATOMPTarget(){ + PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")} + + size_t compute_size(int N) const + { + const size_t num_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (num_padded * (2 * N - num_padded + 1) + + (Alignment - 1) * num_padded) / + 2; + } + + void + resize() + { + // initialize memory containers and views + const size_t total_size = compute_size(this->num_targets_); + memory_pool_.resize(total_size * (1 + D)); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) { + this->distances_[i].attachReference( + memory_pool_.data() + compute_size(i), i); + this->displacements_[i].attachReference(i, total_size, + memory_pool_.data() + total_size + compute_size(i)); + } + + old_r_mem_.resize(this->num_targets_); + old_dr_mem_.resize(this->num_targets_); + temp_r_mem_.resize(this->num_targets_); + temp_dr_mem_.resize(this->num_targets_); + } + + const RealType* + getMultiWalkerTempDataPtr() const override + { + return mw_mem_handle_.getResource().mw_new_old_dist_displ.data(); + } + + void + createResource(ResourceCollection& collection) const override + { + auto resource_index = + collection.addResource(std::make_unique()); + } + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = + dt_list.template getCastedLeader(); + dt_leader.mw_mem_handle_ = + collection.lendResource(); + const size_t nw = dt_list.size(); + const size_t stride_size = num_targets_padded_ * (D + 1); + + for (int iw = 0; iw < nw; iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + dt.temp_r_.free(); + dt.temp_dr_.free(); + dt.old_r_.free(); + dt.old_dr_.free(); + } + + auto& mw_new_old_dist_displ = + dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ; + mw_new_old_dist_displ.resize(nw * 2 * stride_size); + for (int iw = 0; iw < nw; iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + dt.temp_r_.attachReference( + mw_new_old_dist_displ.data() + stride_size * iw, + num_targets_padded_); + dt.temp_dr_.attachReference(this->num_targets_, num_targets_padded_, + mw_new_old_dist_displ.data() + stride_size * iw + + num_targets_padded_); + dt.old_r_.attachReference( + mw_new_old_dist_displ.data() + stride_size * (iw + nw), + num_targets_padded_); + dt.old_dr_.attachReference(this->num_targets_, num_targets_padded_, + mw_new_old_dist_displ.data() + stride_size * (iw + nw) + + num_targets_padded_); + } + } + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + collection.takebackResource( + dt_list.template getCastedLeader() + .mw_mem_handle_); + const size_t nw = dt_list.size(); + for (int iw = 0; iw < nw; iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + dt.temp_r_.free(); + dt.temp_dr_.free(); + dt.old_r_.free(); + dt.old_dr_.free(); + } + } + + inline void + evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); + + constexpr T BigR = std::numeric_limits::max(); + for (int iat = 1; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], + P.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, iat, + iat); + } + + /** compute distances from particles in [range_begin, range_end) to all the + * particles. Although [range_begin, range_end) and be any particle [0, + * num_sources), it is only necessary to compute half of the table due to + * the symmetry of AA table. See note of the output data object + * mw_distances_subset To keep resident memory minimal on the device, + * range_end - range_begin < num_particls_stored is required. + */ + const RealType* + mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, size_t range_begin, + size_t range_end) const override + { + auto& dt_leader = + dt_list.template getCastedLeader(); + const size_t subset_size = range_end - range_begin; + if (subset_size > dt_leader.num_particls_stored) + throw std::runtime_error("not enough internal buffer"); + + ScopedTimer local_timer(dt_leader.evaluate_timer_); + + DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& pset_leader = p_list.getLeader(); + + const size_t nw = dt_list.size(); + const auto num_sources_local = dt_leader.num_targets_; + const auto num_padded = dt_leader.num_targets_padded_; + mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded); + + const int ChunkSizePerTeam = 512; + const size_t num_teams = + (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto& coordinates_leader = + static_cast&>( + pset_leader.getCoordinates()); + + auto* rsoa_dev_list_ptr = + coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); + auto* dist_ranged = mw_mem.mw_distances_subset.data(); + { + ScopedTimer offload(dt_leader.offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(nw * num_teams)") + for (int iw = 0; iw < nw; ++iw) + for (int team_id = 0; team_id < num_teams; team_id++) { + auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min( + first + ChunkSizePerTeam, num_sources_local); + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) { + for (int irow = 0; irow < subset_size; irow++) { + RealType* dist = dist_ranged + + (irow + subset_size * iw) * num_padded; + size_t id_target = irow + range_begin; + + RealType dx, dy, dz; + if (id_target < iel) { + dx = source_pos_ptr[id_target] - + source_pos_ptr[iel]; + dy = source_pos_ptr[id_target + num_padded] - + source_pos_ptr[iel + num_padded]; + dz = + source_pos_ptr[id_target + num_padded * 2] - + source_pos_ptr[iel + num_padded * 2]; + } + else { + const size_t id_target_reverse = + num_sources_local - 1 - id_target; + const size_t iel_reverse = + num_sources_local - 1 - iel; + dx = source_pos_ptr[id_target_reverse] - + source_pos_ptr[iel_reverse]; + dy = source_pos_ptr[id_target_reverse + + num_padded] - + source_pos_ptr[iel_reverse + num_padded]; + dz = source_pos_ptr[id_target_reverse + + num_padded * 2] - + source_pos_ptr[iel_reverse + + num_padded * 2]; + } + + dist[iel] = + DTD_BConds::computeDist( + dx, dy, dz); + } + } + } + } + return mw_mem.mw_distances_subset.data(); + } + + /// evaluate the temporary pair relations + inline void + move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, + bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + +#if !defined(NDEBUG) + old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + this->temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size()); + this->temp_dr_.attachReference( + temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data()); + + assert((prepare_old && iat >= 0 && iat < this->num_targets_) || + !prepare_old); + DTD_BConds::computeDistances(rnew, + P.getCoordinates().getAllParticlePos(), this->temp_r_.data(), + this->temp_dr_, 0, this->num_targets_, iat); + // set up old_r_ and old_dr_ for moves may get accepted. + if (prepare_old) { + this->old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size()); + this->old_dr_.attachReference( + old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data()); + // recompute from scratch + DTD_BConds::computeDistances(P.R[iat], + P.getCoordinates().getAllParticlePos(), this->old_r_.data(), + this->old_dr_, 0, this->num_targets_, iat); + this->old_r_[iat] = + std::numeric_limits::max(); // assign a big number + } + } + + /** evaluate the temporary pair relations when a move is proposed + * this implementation is asynchronous and the synchronization is managed at + * ParticleSet. Transferring results to host depends on + * DTModes::NEED_TEMP_DATA_ON_HOST. If the temporary pair distance are + * consumed on the device directly, the device to host data transfer can be + * skipped as an optimization. + */ + void + mw_move(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& rnew_list, const IndexType iat, + bool prepare_old = true) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = + dt_list.template getCastedLeader(); + DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& pset_leader = p_list.getLeader(); + + ScopedTimer local_timer(move_timer_); + const size_t nw = dt_list.size(); + const size_t stride_size = num_targets_padded_ * (D + 1); + + auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ; + + for (int iw = 0; iw < nw; iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); +#if !defined(NDEBUG) + dt.old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + auto& coordinates_soa = + static_cast&>( + p_list[iw].getCoordinates()); + } + + const int ChunkSizePerTeam = 512; + const size_t num_teams = + (this->num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto& coordinates_leader = + static_cast&>( + pset_leader.getCoordinates()); + + const auto num_sources_local = this->num_targets_; + const auto num_padded = num_targets_padded_; + auto* rsoa_dev_list_ptr = + coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); + auto* r_dr_ptr = mw_new_old_dist_displ.data(); + auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data(); + const size_t new_pos_stride = + coordinates_leader.getFusedNewPosBuffer().capacity(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(nw * num_teams) nowait \ + depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])") + for (int iw = 0; iw < nw; ++iw) + for (int team_id = 0; team_id < num_teams; team_id++) { + auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min( + first + ChunkSizePerTeam, num_sources_local); + + { // temp + auto* r_iw_ptr = r_dr_ptr + iw * stride_size; + auto* dr_iw_ptr = + r_dr_ptr + iw * stride_size + num_padded; + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = new_pos_ptr[idim * new_pos_stride + iw]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, + source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, + num_padded, iel, iat); + } + + if (prepare_old) { // old + auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size; + auto* dr_iw_ptr = + r_dr_ptr + (iw + nw) * stride_size + num_padded; + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = source_pos_ptr[idim * num_padded + iat]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, + source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, + num_padded, iel, iat); + r_iw_ptr[iat] = + std::numeric_limits::max(); // assign a + // big number + } + } + } + + if (this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) { + PRAGMA_OFFLOAD("omp target update nowait \ + depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ + from(r_dr_ptr[:mw_new_old_dist_displ.size()])") + } + } + + int + get_first_neighbor( + IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + // ensure there are neighbors + assert(this->num_targets_ > 1); + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) { + for (int jat = 0; jat < this->num_targets_; ++jat) + if (this->temp_r_[jat] < min_dist && jat != iat) { + min_dist = this->temp_r_[jat]; + index = jat; + } + assert(index >= 0); + dr = this->temp_dr_[index]; + } + else { + for (int jat = 0; jat < iat; ++jat) + if (this->distances_[iat][jat] < min_dist) { + min_dist = this->distances_[iat][jat]; + index = jat; + } + for (int jat = iat + 1; jat < this->num_targets_; ++jat) + if (this->distances_[jat][iat] < min_dist) { + min_dist = this->distances_[jat][iat]; + index = jat; + } + assert(index != iat && index >= 0); + if (index < iat) + dr = this->displacements_[iat][index]; + else + dr = this->displacements_[index][iat]; + } + r = min_dist; + return index; + } + + /** After accepting the iat-th particle, update the iat-th row of distances_ + * and displacements_. Upper triangle is not needed in the later computation + * and thus not updated + */ + inline void + update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + // update [0, iat) columns + const int nupdate = iat; + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n( + this->temp_r_.data(), nupdate, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, + this->displacements_[iat].data(idim)); + // copy column + for (size_t i = iat + 1; i < this->num_targets_; ++i) { + this->distances_[i][iat] = this->temp_r_[i]; + this->displacements_[i](iat) = -this->temp_dr_[i]; + } + } + + void + updatePartial(IndexType jat, bool from_temp) override + { + ScopedTimer local_timer(update_timer_); + + // update [0, jat) + const int nupdate = jat; + if (from_temp) { + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n( + this->temp_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, + this->displacements_[jat].data(idim)); + } + else { + assert(old_prepared_elec_id_ == jat); + // copy row + assert(nupdate <= this->old_r_.size()); + std::copy_n( + this->old_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->old_dr_.data(idim), nupdate, + this->displacements_[jat].data(idim)); + } + } + + void + mw_updatePartial(const RefVectorWithLeader>& dt_list, + IndexType jat, const std::vector& from_temp) override + { + // if temp data on host is not updated by mw_move during p-by-p moves, + // there is no need to update distance table + if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)) + return; + + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].updatePartial(jat, from_temp[iw]); + } + + void + mw_finalizePbyP(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list) const override + { + // if the distance table is not updated by mw_move during p-by-p, needs + // to recompute the whole table before being used by Hamiltonian if + // requested + if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) && + (this->modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP)) + this->mw_evaluate(dt_list, p_list); + } + + size_t + get_num_particls_stored() const override + { + return num_particls_stored; + } + +private: + /// number of targets with padding + const size_t num_targets_padded_; +#if !defined(NDEBUG) + /** set to particle id after move() with prepare_old = true. -1 means not + * prepared. It is intended only for safety checks, not for codepath + * selection. + */ + int old_prepared_elec_id_; +#endif + /// timer for offload portion + NewTimer& offload_timer_; + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; + /// the particle count of the internal stored distances. + const size_t num_particls_stored = 64; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableABT.h b/src/Particle/SoaDistanceTableABT.h new file mode 100644 index 0000000000..e2eb2709bf --- /dev/null +++ b/src/Particle/SoaDistanceTableABT.h @@ -0,0 +1,170 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_ABT_H +#define QMCPLUSPLUS_DTDIMPL_ABT_H + +#include "Concurrency/OpenMP.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "Particle/DistanceTableT.h" +#include "Utilities/FairDivide.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for AB using a + * transposed form + */ +template +struct SoaDistanceTableABT : + public DTD_BConds::RealType, D, SC>, + public DistanceTableABT +{ + using RealType = typename DistanceTableABT::RealType; + using PosType = typename DistanceTableABT::PosType; + using IndexType = typename DistanceTableABT::IndexType; + + SoaDistanceTableABT( + const ParticleSetT& source, ParticleSetT& target) : + DTD_BConds(source.getLattice()), + DistanceTableABT(source, target, DTModes::ALL_OFF), + evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") + + target.getName() + "_" + source.getName(), + timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTAB::move_") + + target.getName() + "_" + source.getName(), + timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTAB::update_") + + target.getName() + "_" + source.getName(), + timer_level_fine)) + { + resize(); + } + + void + resize() + { + if (this->num_sources_ * this->num_targets_ == 0) + return; + + // initialize memory containers and views + const int num_sources_padded = getAlignedSize(this->num_sources_); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) { + this->distances_[i].resize(num_sources_padded); + this->displacements_[i].resize(num_sources_padded); + } + + // The padding of temp_r_ and temp_dr_ is necessary for the memory copy + // in the update function temp_r_ is padded explicitly while temp_dr_ is + // padded internally + this->temp_r_.resize(num_sources_padded); + this->temp_dr_.resize(this->num_sources_); + } + + SoaDistanceTableABT() = delete; + SoaDistanceTableABT(const SoaDistanceTableABT&) = delete; + + /** evaluate the full table */ + inline void + evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); +#pragma omp parallel + { + int first, last; + FairDivideAligned(this->num_sources_, getAlignment(), + omp_get_num_threads(), omp_get_thread_num(), first, last); + + // be aware of the sign of Displacement + for (int iat = 0; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], + this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], + first, last); + } + } + + /// evaluate the temporary pair relations + inline void + move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, + bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + DTD_BConds::computeDistances(rnew, + this->origin_.getCoordinates().getAllParticlePos(), this->temp_r_.data(), + this->temp_dr_, 0, this->num_sources_); + // If the full table is not ready all the time, overwrite the current + // value. If this step is missing, DT values can be undefined in case a + // move is rejected. + if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) + DTD_BConds::computeDistances(P.R[iat], + this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, + this->num_sources_); + } + + /// update the stripe for jat-th particle + inline void + update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + std::copy_n(this->temp_r_.data(), this->num_sources_, + this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), this->num_sources_, + this->displacements_[iat].data(idim)); + } + + int + get_first_neighbor( + IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->temp_r_[jat] < min_dist) { + min_dist = this->temp_r_[jat]; + index = jat; + } + if (index >= 0) { + r = min_dist; + dr = this->temp_dr_[index]; + } + } + else { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->distances_[iat][jat] < min_dist) { + min_dist = this->distances_[iat][jat]; + index = jat; + } + if (index >= 0) { + r = min_dist; + dr = this->displacements_[iat][index]; + } + } + assert(index >= 0 && index < this->num_sources_); + return index; + } + +private: + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableABTOMPTarget.h b/src/Particle/SoaDistanceTableABTOMPTarget.h new file mode 100644 index 0000000000..452100cb25 --- /dev/null +++ b/src/Particle/SoaDistanceTableABTOMPTarget.h @@ -0,0 +1,513 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H +#define QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H + +#include "DistanceTableT.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "OMPTarget/OMPallocator.hpp" +#include "Particle/RealSpacePositionsTOMPTarget.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for AB using a + * transposed form + */ +template +class SoaDistanceTableABTOMPTarget : + public DTD_BConds::RealType, D, SC>, + public DistanceTableABT +{ +private: + template + using OffloadPinnedVector = + Vector>>; + + using RealType = typename DistanceTableABT::RealType; + using PosType = typename DistanceTableABT::PosType; + using IndexType = typename DistanceTableABT::IndexType; + + /// accelerator output buffer for r and dr + OffloadPinnedVector r_dr_memorypool_; + /// accelerator input array for a list of target particle positions, + /// num_targets_ x D + OffloadPinnedVector target_pos; + + /// multi walker shared memory buffer + struct DTABMultiWalkerMem : public Resource + { + /// accelerator output array for multiple walkers, + /// [1+D][num_targets_][num_padded] (distances, displacements) + OffloadPinnedVector mw_r_dr; + /// accelerator input buffer for multiple data set + OffloadPinnedVector offload_input; + + DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") + { + } + + DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() + { + } + + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } + }; + + ResourceHandle mw_mem_handle_; + + void + resize() + { + if (this->num_sources_ * this->num_targets_ == 0) + return; + if (this->distances_.size()) + return; + + // initialize memory containers and views + const size_t num_padded = getAlignedSize(this->num_sources_); + const size_t stride_size = getPerTargetPctlStrideSize(); + r_dr_memorypool_.resize(stride_size * this->num_targets_); + + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) { + this->distances_[i].attachReference( + r_dr_memorypool_.data() + i * stride_size, this->num_sources_); + this->displacements_[i].attachReference(this->num_sources_, + num_padded, + r_dr_memorypool_.data() + i * stride_size + num_padded); + } + } + + static void + associateResource(const RefVectorWithLeader>& dt_list) + { + auto& dt_leader = + dt_list.template getCastedLeader(); + + // initialize memory containers and views + size_t count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + count_targets += dt.targets(); + dt.r_dr_memorypool_.free(); + } + + const size_t num_sources = dt_leader.num_sources_; + const size_t num_padded = + getAlignedSize(dt_leader.num_sources_); + const size_t stride_size = num_padded * (D + 1); + const size_t total_targets = count_targets; + auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr; + mw_r_dr.resize(total_targets * stride_size); + + count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + assert(num_sources == dt.num_sources_); + + dt.distances_.resize(dt.targets()); + dt.displacements_.resize(dt.targets()); + + for (int i = 0; i < dt.targets(); ++i) { + dt.distances_[i].attachReference( + mw_r_dr.data() + (i + count_targets) * stride_size, + num_sources); + dt.displacements_[i].attachReference(num_sources, num_padded, + mw_r_dr.data() + (i + count_targets) * stride_size + + num_padded); + } + count_targets += dt.targets(); + } + } + +public: + SoaDistanceTableABTOMPTarget( + const ParticleSetT& source, ParticleSetT& target) : + DTD_BConds(source.getLattice()), + DistanceTableABT(source, target, DTModes::ALL_OFF), + offload_timer_(createGlobalTimer( + std::string("DTABOMPTarget::offload_") + this->name_, + timer_level_fine)), + evaluate_timer_(createGlobalTimer( + std::string("DTABOMPTarget::evaluate_") + this->name_, + timer_level_fine)), + move_timer_( + createGlobalTimer(std::string("DTABOMPTarget::move_") + this->name_, + timer_level_fine)), + update_timer_(createGlobalTimer( + std::string("DTABOMPTarget::update_") + this->name_, + timer_level_fine)) + + { + auto* coordinates_soa = + dynamic_cast*>( + &source.getCoordinates()); + if (!coordinates_soa) + throw std::runtime_error("Source particle set doesn't have OpenMP " + "offload. Contact developers!"); + PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") + + // The padding of temp_r_ and temp_dr_ is necessary for the memory copy + // in the update function temp_r_ is padded explicitly while temp_dr_ is + // padded internally + const int num_padded = getAlignedSize(this->num_sources_); + this->temp_r_.resize(num_padded); + this->temp_dr_.resize(this->num_sources_); + } + + SoaDistanceTableABTOMPTarget() = delete; + SoaDistanceTableABTOMPTarget(const SoaDistanceTableABTOMPTarget&) = delete; + + ~SoaDistanceTableABTOMPTarget() + { + PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") + } + + void + createResource(ResourceCollection& collection) const override + { + auto resource_index = + collection.addResource(std::make_unique()); + } + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + auto& dt_leader = + dt_list.template getCastedLeader(); + dt_leader.mw_mem_handle_ = + collection.lendResource(); + associateResource(dt_list); + } + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + collection.takebackResource( + dt_list.template getCastedLeader() + .mw_mem_handle_); + for (size_t iw = 0; iw < dt_list.size(); iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + dt.distances_.clear(); + dt.displacements_.clear(); + } + } + + const RealType* + getMultiWalkerDataPtr() const override + { + return mw_mem_handle_.getResource().mw_r_dr.data(); + } + + size_t + getPerTargetPctlStrideSize() const override + { + return getAlignedSize(this->num_sources_) * (D + 1); + } + + /** evaluate the full table */ + inline void + evaluate(ParticleSetT& P) override + { + resize(); + + ScopedTimer local_timer(evaluate_timer_); + // be aware of the sign of Displacement + const int num_targets_local = this->num_targets_; + const int num_sources_local = this->num_sources_; + const int num_padded = getAlignedSize(this->num_sources_); + + target_pos.resize(this->num_targets_ * D); + for (size_t iat = 0; iat < this->num_targets_; iat++) + for (size_t idim = 0; idim < D; idim++) + target_pos[iat * D + idim] = P.R[iat][idim]; + + auto* target_pos_ptr = target_pos.data(); + auto* source_pos_ptr = + this->origin_.getCoordinates().getAllParticlePos().data(); + auto* r_dr_ptr = this->distances_[0].data(); + assert(this->distances_[0].data() + num_padded == + this->displacements_[0].data()); + + // To maximize thread usage, the loop over electrons is chunked. Each + // chunk is sent to an OpenMP offload thread team. + const int ChunkSizePerTeam = 512; + const size_t num_teams = + (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const size_t stride_size = getPerTargetPctlStrideSize(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(this->num_targets_*num_teams) \ + map(to: source_pos_ptr[:num_padded*D]) \ + map(always, to: target_pos_ptr[:this->num_targets_*D]) \ + map(always, from: r_dr_ptr[:this->num_targets_*stride_size])") + for (int iat = 0; iat < num_targets_local; ++iat) + for (int team_id = 0; team_id < num_teams; team_id++) { + const int first = ChunkSizePerTeam * team_id; + const int last = omptarget::min( + first + ChunkSizePerTeam, num_sources_local); + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = target_pos_ptr[iat * D + idim]; + + auto* r_iat_ptr = r_dr_ptr + iat * stride_size; + auto* dr_iat_ptr = r_iat_ptr + num_padded; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload( + pos, source_pos_ptr, num_padded, r_iat_ptr, + dr_iat_ptr, num_padded, iel); + } + } + } + + inline void + mw_evaluate(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = + dt_list.template getCastedLeader(); + + ScopedTimer local_timer(evaluate_timer_); + + const size_t nw = dt_list.size(); + DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& mw_r_dr = mw_mem.mw_r_dr; + + size_t count_targets = 0; + for (ParticleSetT& p : p_list) + count_targets += p.getTotalNum(); + const size_t total_targets = count_targets; + + const int num_padded = getAlignedSize(this->num_sources_); + +#ifndef NDEBUG + const int stride_size = getPerTargetPctlStrideSize(); + count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + + for (int i = 0; i < dt.targets(); ++i) { + assert(dt.distances_[i].data() == + mw_r_dr.data() + (i + count_targets) * stride_size); + assert(dt.displacements_[i].data() == + mw_r_dr.data() + (i + count_targets) * stride_size + + num_padded); + } + count_targets += dt.targets(); + } +#endif + + // This is horrible optimization putting different data types in a + // single buffer but allows a single H2D transfer + const size_t realtype_size = sizeof(RealType); + const size_t int_size = sizeof(int); + const size_t ptr_size = sizeof(RealType*); + auto& offload_input = mw_mem.offload_input; + offload_input.resize(total_targets * D * realtype_size + + total_targets * int_size + nw * ptr_size); + auto source_ptrs = reinterpret_cast(offload_input.data()); + auto target_positions = + reinterpret_cast(offload_input.data() + ptr_size * nw); + auto walker_id_ptr = reinterpret_cast(offload_input.data() + + ptr_size * nw + total_targets * D * realtype_size); + + count_targets = 0; + for (size_t iw = 0; iw < nw; iw++) { + auto& dt = + dt_list.template getCastedElement( + iw); + ParticleSetT& pset(p_list[iw]); + + assert(dt.targets() == pset.getTotalNum()); + assert(this->num_sources_ == dt.num_sources_); + + auto& RSoA_OMPTarget = + static_cast&>( + dt.origin_.getCoordinates()); + source_ptrs[iw] = + const_cast(RSoA_OMPTarget.getDevicePtr()); + + for (size_t iat = 0; iat < pset.getTotalNum(); + ++iat, ++count_targets) { + walker_id_ptr[count_targets] = iw; + for (size_t idim = 0; idim < D; idim++) + target_positions[count_targets * D + idim] = + pset.R[iat][idim]; + } + } + + // To maximize thread usage, the loop over electrons is chunked. Each + // chunk is sent to an OpenMP offload thread team. + const int ChunkSizePerTeam = 512; + const size_t num_teams = + (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto* r_dr_ptr = mw_r_dr.data(); + auto* input_ptr = offload_input.data(); + const int num_sources_local = this->num_sources_; + + { + ScopedTimer offload(dt_leader.offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(total_targets*num_teams) \ + map(always, to: input_ptr[:offload_input.size()]) \ + depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait") + for (int iat = 0; iat < total_targets; ++iat) + for (int team_id = 0; team_id < num_teams; team_id++) { + auto* target_pos_ptr = + reinterpret_cast(input_ptr + ptr_size * nw); + const int walker_id = reinterpret_cast(input_ptr + + ptr_size * nw + total_targets * D * realtype_size)[iat]; + auto* source_pos_ptr = + reinterpret_cast(input_ptr)[walker_id]; + auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1); + auto* dr_iat_ptr = + r_dr_ptr + iat * num_padded * (D + 1) + num_padded; + + const int first = ChunkSizePerTeam * team_id; + const int last = omptarget::min( + first + ChunkSizePerTeam, num_sources_local); + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = target_pos_ptr[iat * D + idim]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload( + pos, source_pos_ptr, num_padded, r_iat_ptr, + dr_iat_ptr, num_padded, iel); + } + + if (!(this->modes_ & + DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) { + PRAGMA_OFFLOAD( + "omp target update from(r_dr_ptr[:mw_r_dr.size()]) \ + depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") + } + // wait for computing and (optional) transferring back to host. + // It can potentially be moved to ParticleSet to fuse multiple + // similar taskwait + PRAGMA_OFFLOAD("omp taskwait") + } + } + + inline void + mw_recompute(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& recompute) const override + { + mw_evaluate(dt_list, p_list); + } + + /// evaluate the temporary pair relations + inline void + move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, + bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + DTD_BConds::computeDistances(rnew, + this->origin_.getCoordinates().getAllParticlePos(), + this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_); + // If the full table is not ready all the time, overwrite the current + // value. If this step is missing, DT values can be undefined in case a + // move is rejected. + if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) + DTD_BConds::computeDistances(P.R[iat], + this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, + this->num_sources_); + } + + /// update the stripe for jat-th particle + inline void + update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + std::copy_n(this->temp_r_.data(), this->num_sources_, + this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), this->num_sources_, + this->displacements_[iat].data(idim)); + } + + int + get_first_neighbor( + IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->temp_r_[jat] < min_dist) { + min_dist = this->temp_r_[jat]; + index = jat; + } + if (index >= 0) { + r = min_dist; + dr = this->temp_dr_[index]; + } + } + else { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->distances_[iat][jat] < min_dist) { + min_dist = this->distances_[iat][jat]; + index = jat; + } + if (index >= 0) { + r = min_dist; + dr = this->displacements_[iat][index]; + } + } + assert(index >= 0 && index < this->num_sources_); + return index; + } + +private: + /// timer for offload portion + NewTimer& offload_timer_; + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/VirtualParticleSetT.cpp b/src/Particle/VirtualParticleSetT.cpp new file mode 100644 index 0000000000..1f896405fc --- /dev/null +++ b/src/Particle/VirtualParticleSetT.cpp @@ -0,0 +1,272 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file VirtualParticleSet.cpp + * A proxy class to the quantum ParticleSet + */ + +#include "VirtualParticleSetT.h" + +#include "Particle/DistanceTableT.h" +#include "Particle/createDistanceTableT.h" +#include "QMCHamiltonians/NLPPJob.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ + +struct VPMultiWalkerMem : public Resource +{ + /// multi walker reference particle + Vector> mw_refPctls; + + VPMultiWalkerMem() : Resource("VPMultiWalkerMem") + { + } + + VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() + { + } + + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } +}; + +template +VirtualParticleSetT::VirtualParticleSetT( + const ParticleSetT& p, int nptcl, size_t dt_count_limit) : + ParticleSetT(p.getSimulationCell()) +{ + this->setName("virtual"); + + // initialize local data structure + this->setSpinor(p.isSpinor()); + this->TotalNum = nptcl; + this->R.resize(nptcl); + if (this->isSpinor()) + this->spins.resize(nptcl); + this->coordinates_->resize(nptcl); + + // create distancetables + assert(dt_count_limit <= p.getNumDistTables()); + if (dt_count_limit == 0) + dt_count_limit = p.getNumDistTables(); + for (int i = 0; i < dt_count_limit; ++i) + if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST) + this->addTable(p.getDistTable(i).get_origin()); + else + this->addTable(p.getDistTable(i).get_origin(), + DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST); +} + +template +VirtualParticleSetT::~VirtualParticleSetT() = default; + +template +Vector>& +VirtualParticleSetT::getMultiWalkerRefPctls() +{ + return mw_mem_handle_.getResource().mw_refPctls; +} + +template +const Vector>& +VirtualParticleSetT::getMultiWalkerRefPctls() const +{ + return mw_mem_handle_.getResource().mw_refPctls; +} + +template +void +VirtualParticleSetT::createResource(ResourceCollection& collection) const +{ + collection.addResource(std::make_unique()); + ParticleSetT::createResource(collection); +} + +template +void +VirtualParticleSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list) +{ + auto& vp_leader = vp_list.getLeader(); + vp_leader.mw_mem_handle_ = collection.lendResource(); + + auto p_list = RefVectorWithLeaderParticleSet(vp_list); + ParticleSetT::acquireResource(collection, p_list); +} + +template +void +VirtualParticleSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list) +{ + collection.takebackResource(vp_list.getLeader().mw_mem_handle_); + auto p_list = RefVectorWithLeaderParticleSet(vp_list); + ParticleSetT::releaseResource(collection, p_list); +} + +/// move virtual particles to new postions and update distance tables +template +void +VirtualParticleSetT::makeMoves(const ParticleSetT& refp, int jel, + const std::vector& deltaV, bool sphere, int iat) +{ + if (sphere && iat < 0) + throw std::runtime_error( + "VirtualParticleSet::makeMoves is invoked incorrectly, the flag " + "sphere=true requires iat specified!"); + onSphere = sphere; + refPS = refp; + refPtcl = jel; + refSourcePtcl = iat; + assert(this->R.size() == deltaV.size()); + for (size_t ivp = 0; ivp < this->R.size(); ivp++) + this->R[ivp] = refp.R[jel] + deltaV[ivp]; + if (refp.isSpinor()) + for (size_t ivp = 0; ivp < this->R.size(); ivp++) + this->spins[ivp] = refp.spins[jel]; // no spin deltas in this API + this->update(); +} + +/// move virtual particles to new postions and update distance tables +template +void +VirtualParticleSetT::makeMovesWithSpin(const ParticleSetT& refp, int jel, + const std::vector& deltaV, const std::vector& deltaS, + bool sphere, int iat) +{ + assert(refp.isSpinor()); + if (sphere && iat < 0) + throw std::runtime_error( + "VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the " + "flag sphere=true requires iat specified!"); + onSphere = sphere; + refPS = refp; + refPtcl = jel; + refSourcePtcl = iat; + assert(this->R.size() == deltaV.size()); + assert(this->spins.size() == deltaS.size()); + for (size_t ivp = 0; ivp < this->R.size(); ivp++) { + this->R[ivp] = refp.R[jel] + deltaV[ivp]; + this->spins[ivp] = refp.spins[jel] + deltaS[ivp]; + } + this->update(); +} + +template +void +VirtualParticleSetT::mw_makeMoves( + const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& refp_list, + const RefVector>& deltaV_list, + const RefVector>& joblist, bool sphere) +{ + auto& vp_leader = vp_list.getLeader(); + vp_leader.onSphere = sphere; + vp_leader.refPS = refp_list.getLeader(); + + const size_t nVPs = countVPs(vp_list); + auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); + mw_refPctls.resize(nVPs); + + RefVectorWithLeader> p_list(vp_leader); + p_list.reserve(vp_list.size()); + + size_t ivp = 0; + for (int iw = 0; iw < vp_list.size(); iw++) { + VirtualParticleSetT& vp(vp_list[iw]); + const std::vector& deltaV(deltaV_list[iw]); + const NLPPJob& job(joblist[iw]); + + vp.onSphere = sphere; + vp.refPS = refp_list[iw]; + vp.refPtcl = job.electron_id; + vp.refSourcePtcl = job.ion_id; + assert(vp.R.size() == deltaV.size()); + for (size_t k = 0; k < vp.R.size(); k++, ivp++) { + vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; + if (vp_leader.isSpinor()) + vp.spins[k] = + refp_list[iw] + .spins[vp.refPtcl]; // no spin deltas in this API + mw_refPctls[ivp] = vp.refPtcl; + } + p_list.push_back(vp); + } + assert(ivp == nVPs); + + mw_refPctls.updateTo(); + ParticleSetT::mw_update(p_list); +} + +template +void +VirtualParticleSetT::mw_makeMovesWithSpin( + const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& refp_list, + const RefVector>& deltaV_list, + const RefVector>& deltaS_list, + const RefVector>& joblist, bool sphere) +{ + auto& vp_leader = vp_list.getLeader(); + if (!vp_leader.isSpinor()) + throw std::runtime_error( + "VirtualParticleSet::mw_makeMovesWithSpin should not be called if " + "particle sets aren't spionor types"); + vp_leader.onSphere = sphere; + vp_leader.refPS = refp_list.getLeader(); + + const size_t nVPs = countVPs(vp_list); + auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); + mw_refPctls.resize(nVPs); + + RefVectorWithLeader> p_list(vp_leader); + p_list.reserve(vp_list.size()); + + size_t ivp = 0; + for (int iw = 0; iw < vp_list.size(); iw++) { + VirtualParticleSetT& vp(vp_list[iw]); + const std::vector& deltaV(deltaV_list[iw]); + const std::vector& deltaS(deltaS_list[iw]); + const NLPPJob& job(joblist[iw]); + + vp.onSphere = sphere; + vp.refPS = refp_list[iw]; + vp.refPtcl = job.electron_id; + vp.refSourcePtcl = job.ion_id; + assert(vp.R.size() == deltaV.size()); + assert(vp.spins.size() == deltaS.size()); + assert(vp.R.size() == vp.spins.size()); + for (size_t k = 0; k < vp.R.size(); k++, ivp++) { + vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; + vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k]; + mw_refPctls[ivp] = vp.refPtcl; + } + p_list.push_back(vp); + } + assert(ivp == nVPs); + + mw_refPctls.updateTo(); + ParticleSetT::mw_update(p_list); +} + +template class VirtualParticleSetT; +template class VirtualParticleSetT; +template class VirtualParticleSetT>; +template class VirtualParticleSetT>; +} // namespace qmcplusplus diff --git a/src/Particle/VirtualParticleSetT.h b/src/Particle/VirtualParticleSetT.h new file mode 100644 index 0000000000..83e4d5aa57 --- /dev/null +++ b/src/Particle/VirtualParticleSetT.h @@ -0,0 +1,175 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H +#define QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Particle/ParticleSetT.h" +#include + +namespace qmcplusplus +{ +// forward declaration. +class NonLocalECPComponent; +template +struct NLPPJob; +struct VPMultiWalkerMem; + +/** A ParticleSet that handles virtual moves of a selected particle of a given + * physical ParticleSet Virtual moves are defined as moves being proposed but + * will never be accepted. VirtualParticleSet is introduced to avoid changing + * any internal states of the physical ParticleSet. For this reason, the + * physical ParticleSet is always marked const. It is heavily used by non-local + * PP evaluations. + */ +template +class VirtualParticleSetT : public ParticleSetT +{ +public: + using RealType = typename ParticleSetT::RealType; + using PosType = typename ParticleSetT::PosType; + +private: + /// true, if virtual particles are on a sphere for NLPP + bool onSphere; + /// multi walker resource + ResourceHandle mw_mem_handle_; + + Vector>& + getMultiWalkerRefPctls(); + + /// ParticleSet this object refers to after makeMoves + std::optional>> refPS; + +public: + /// Reference particle + int refPtcl; + /// Reference source particle, used when onSphere=true + int refSourcePtcl; + + /// ParticleSet this object refers to + const ParticleSetT& + getRefPS() const + { + return refPS.value(); + } + + inline bool + isOnSphere() const + { + return onSphere; + } + + const Vector>& + getMultiWalkerRefPctls() const; + + /** constructor + * @param p ParticleSet whose virtual moves are handled by this object + * @param nptcl number of virtual particles + * @param dt_count_limit distance tables corresepond to [0, dt_count_limit) + * of the reference particle set are created + */ + VirtualParticleSetT( + const ParticleSetT& p, int nptcl, size_t dt_count_limit = 0); + + ~VirtualParticleSetT(); + + /// initialize a shared resource and hand it to a collection + void + createResource(ResourceCollection& collection) const; + /** acquire external resource and assocaite it with the list of ParticleSet + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list); + /** release external resource + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list); + + /** move virtual particles to new postions and update distance tables + * @param refp reference particle set + * @param jel reference particle that all the VP moves from + * @param deltaV Position delta for virtual moves. + * @param sphere set true if VP are on a sphere around the reference source + * particle + * @param iat reference source particle + */ + void + makeMoves(const ParticleSetT& refp, int jel, + const std::vector& deltaV, bool sphere = false, int iat = -1); + + /** move virtual particles to new postions and update distance tables + * @param refp reference particle set + * @param jel reference particle that all the VP moves from + * @param deltaV Position delta for virtual moves. + * @param deltaS Spin delta for virtual moves. + * @param sphere set true if VP are on a sphere around the reference source + * particle + * @param iat reference source particle + */ + void + makeMovesWithSpin(const ParticleSetT& refp, int jel, + const std::vector& deltaV, const std::vector& deltaS, + bool sphere = false, int iat = -1); + + static void + mw_makeMoves(const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& p_list, + const RefVector>& deltaV_list, + const RefVector>& joblist, bool sphere); + + static void + mw_makeMovesWithSpin( + const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& p_list, + const RefVector>& deltaV_list, + const RefVector>& deltaS_list, + const RefVector>& joblist, bool sphere); + + static RefVectorWithLeader> + RefVectorWithLeaderParticleSet( + const RefVectorWithLeader& vp_list) + { + RefVectorWithLeader> ref_list(vp_list.getLeader()); + ref_list.reserve(ref_list.size()); + for (VirtualParticleSetT& vp : vp_list) + ref_list.push_back(vp); + return ref_list; + } + + static size_t + countVPs(const RefVectorWithLeader& vp_list) + { + size_t nVPs = 0; + for (const VirtualParticleSetT& vp : vp_list) + nVPs += vp.getTotalNum(); + return nVPs; + } + + static size_t + countVPs(const RefVectorWithLeader& vp_list) + { + size_t nVPs = 0; + for (const VirtualParticleSetT& vp : vp_list) + nVPs += vp.getTotalNum(); + return nVPs; + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/createDistanceTableT.cpp b/src/Particle/createDistanceTableT.cpp new file mode 100644 index 0000000000..1905aef3d8 --- /dev/null +++ b/src/Particle/createDistanceTableT.cpp @@ -0,0 +1,240 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/createDistanceTableT.h" + +#include "CPU/SIMD/algorithm.hpp" +#include "Particle/DistanceTableT.h" +#include "Particle/SoaDistanceTableAAT.h" +#include "Particle/SoaDistanceTableAATOMPTarget.h" +#include "Particle/SoaDistanceTableABT.h" +#include "Particle/SoaDistanceTableABTOMPTarget.h" + +namespace qmcplusplus +{ +/** Adding SymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> +createDistanceTableAAT(ParticleSetT& s, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = s.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for similar particles (A-A):" << std::endl; + o << " source/target: " << s.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout" << std::endl; + + if (sc == SUPERCELL_BULK) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + else { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + } + } + else if (sc == SUPERCELL_SLAB) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + else { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + } + } + else if (sc == SUPERCELL_WIRE) { + o << " Distance computations use periodic cell in one dimension." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAAT>(s); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> +createDistanceTableAAT( + ParticleSetT& t, std::ostream& description); +template std::unique_ptr> +createDistanceTableAAT( + ParticleSetT& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableAAT>( + ParticleSetT>& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableAAT>( + ParticleSetT>& t, std::ostream& description); + +/** Adding AsymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> +createDistanceTableABT( + const ParticleSetT& s, ParticleSetT& t, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = t.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for dissimilar particles (A-B):" << std::endl; + o << " source: " << s.getName() << " target: " << t.getName() + << std::endl; + o << " Using structure-of-arrays (SoA) data layout" << std::endl; + + if (sc == SUPERCELL_BULK) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + else { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + } + } + else if (sc == SUPERCELL_SLAB) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + else { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + } + } + else if (sc == SUPERCELL_WIRE) { + o << " Distance computations use periodic cell in one dimension." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABT>(s, t); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> +createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, std::ostream& description); +template std::unique_ptr> +createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableABT>( + const ParticleSetT>& s, + ParticleSetT>& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableABT>( + const ParticleSetT>& s, + ParticleSetT>& t, std::ostream& description); +} // namespace qmcplusplus diff --git a/src/Particle/createDistanceTableT.h b/src/Particle/createDistanceTableT.h new file mode 100644 index 0000000000..64b81aae1e --- /dev/null +++ b/src/Particle/createDistanceTableT.h @@ -0,0 +1,89 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DISTANCETABLET_H +#define QMCPLUSPLUS_DISTANCETABLET_H + +#include "Particle/ParticleSetT.h" + +namespace qmcplusplus +{ +/** Class to manage multiple DistanceTable objects. + * + * \date 2008-09-19 + * static data members are removed. DistanceTable::add functions + * are kept for compatibility only. New codes should use a member function + * of ParticleSet to add a distance table + * int ParticleSet::addTable(const ParticleSet& source) + * + * \deprecated There is only one instance of the data memebers of + * DistanceTable in an application and the data are shared by many objects. + * Note that static data members and functions are used + * (based on singleton and factory patterns). + *\todo DistanceTable should work as a factory, as well, to instantiate + *DistanceTable subject to different boundary conditions. + * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by + *DistanceTable to generically control the crystalline structure. + */ + +/// free function to create a distable table of s-s +template +std::unique_ptr> +createDistanceTableAAT(ParticleSetT& s, std::ostream& description); + +template +std::unique_ptr> +createDistanceTableAATOMPTarget(ParticleSetT& s, std::ostream& description); + +template +inline std::unique_ptr> +createDistanceTableT(ParticleSetT& s, std::ostream& description) +{ + // during P-by-P move, the cost of single particle evaluation of distance + // tables is determined by the number of source particles. Thus the + // implementation selection is determined by the source particle set. + if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) + return createDistanceTableAATOMPTarget(s, description); + else + return createDistanceTableAAT(s, description); +} + +/// free function create a distable table of s-t +template +std::unique_ptr> +createDistanceTableABT( + const ParticleSetT& s, ParticleSetT& t, std::ostream& description); + +template +std::unique_ptr> +createDistanceTableABTOMPTarget( + const ParticleSetT& s, ParticleSetT& t, std::ostream& description); + +template +inline std::unique_ptr> +createDistanceTableT( + const ParticleSetT& s, ParticleSetT& t, std::ostream& description) +{ + // during P-by-P move, the cost of single particle evaluation of distance + // tables is determined by the number of source particles. Thus the + // implementation selection is determined by the source particle set. + if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) + return createDistanceTableABTOMPTarget(s, t, description); + else + return createDistanceTableABT(s, t, description); +} + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/createDistanceTableTOMPTarget.cpp b/src/Particle/createDistanceTableTOMPTarget.cpp new file mode 100644 index 0000000000..afb4653184 --- /dev/null +++ b/src/Particle/createDistanceTableTOMPTarget.cpp @@ -0,0 +1,248 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/createDistanceTableT.h" + +#include "CPU/SIMD/algorithm.hpp" +#include "Particle/DistanceTableT.h" +#include "Particle/SoaDistanceTableAATOMPTarget.h" +#include "Particle/SoaDistanceTableABTOMPTarget.h" + +namespace qmcplusplus +{ +/** Adding SymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> +createDistanceTableAATOMPTarget(ParticleSetT& s, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = s.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for similar particles (A-A):" << std::endl; + o << " source/target: " << s.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" + << std::endl; + + if (sc == SUPERCELL_BULK) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + else { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + } + } + else if (sc == SUPERCELL_SLAB) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + else { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>(s); + } + } + } + else if (sc == SUPERCELL_WIRE) { + o << " Distance computations use periodic cell in one dimension." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>( + s); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableAATOMPTarget>( + s); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> +createDistanceTableAATOMPTarget( + ParticleSetT& t, std::ostream& description); +template std::unique_ptr> +createDistanceTableAATOMPTarget( + ParticleSetT& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableAATOMPTarget>( + ParticleSetT>& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableAATOMPTarget>( + ParticleSetT>& t, std::ostream& description); + +/** Adding AsymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> +createDistanceTableABTOMPTarget( + const ParticleSetT& s, ParticleSetT& t, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = t.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for dissimilar particles (A-B):" << std::endl; + o << " source: " << s.getName() << " target: " << t.getName() + << std::endl; + o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" + << std::endl; + + if (sc == SUPERCELL_BULK) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>(s, t); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + else { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + } + } + else if (sc == SUPERCELL_SLAB) { + if (s.getLattice().DiagonalOnly) { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>(s, t); + } + else { + if (s.getLattice().WignerSeitzRadius > + s.getLattice().SimulationCellRadius) { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + else { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + } + } + else if (sc == SUPERCELL_WIRE) { + o << " Distance computations use periodic cell in one dimension." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." + << std::endl; + dt = std::make_unique< + SoaDistanceTableABTOMPTarget>( + s, t); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> +createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, std::ostream& description); +template std::unique_ptr> +createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableABTOMPTarget>( + const ParticleSetT>& s, + ParticleSetT>& t, std::ostream& description); +template std::unique_ptr>> +createDistanceTableABTOMPTarget>( + const ParticleSetT>& s, + ParticleSetT>& t, std::ostream& description); +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BasisSetBaseT.h b/src/QMCWaveFunctions/BasisSetBaseT.h new file mode 100644 index 0000000000..e6c8bd9e99 --- /dev/null +++ b/src/QMCWaveFunctions/BasisSetBaseT.h @@ -0,0 +1,222 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jaron T. +// Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_BASISSETBASET_H +#define QMCPLUSPLUS_BASISSETBASET_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Particle/ParticleSetT.h" +#include "QMCWaveFunctions/OrbitalSetTraits.h" + +namespace qmcplusplus +{ +/** base class for a basis set + * + * Define a common storage for the derived classes and + * provides a minimal set of interfaces to get/set BasisSetSize. + */ +template +struct BasisSetBaseT : public OrbitalSetTraits +{ + enum + { + MAXINDEX = 2 + OHMMS_DIM + }; + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using IndexType = typename OrbitalSetTraits::IndexType; + using HessType = typename OrbitalSetTraits::HessType; + using IndexVector = typename OrbitalSetTraits::IndexVector; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; + using GradVector = typename OrbitalSetTraits::GradVector; + using GradMatrix = typename OrbitalSetTraits::GradMatrix; + using HessVector = typename OrbitalSetTraits::HessVector; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGType = TinyVector; + using GGGVector = Vector; + using GGGMatrix = Matrix; + + /// size of the basis set + IndexType BasisSetSize; + /// index of the particle + IndexType ActivePtcl; + /// counter to keep track + unsigned long Counter; + /// phi[i] the value of the i-th basis set + ValueVector Phi; + /// dphi[i] the gradient of the i-th basis set + GradVector dPhi; + /// d2phi[i] the laplacian of the i-th basis set + ValueVector d2Phi; + /// grad_grad_Phi[i] the full hessian of the i-th basis set + HessVector grad_grad_Phi; + /// grad_grad_grad_Phi the full hessian of the i-th basis set + GGGVector grad_grad_grad_Phi; + /// container to store value, laplacian and gradient + ValueMatrix Temp; + + ValueMatrix Y; + GradMatrix dY; + ValueMatrix d2Y; + + /// default constructor + BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0) + { + } + /// virtual destructor + virtual ~BasisSetBaseT() + { + } + /** resize the container */ + void + resize(int ntargets) + { + if (BasisSetSize) { + Phi.resize(BasisSetSize); + dPhi.resize(BasisSetSize); + d2Phi.resize(BasisSetSize); + grad_grad_Phi.resize(BasisSetSize); + grad_grad_grad_Phi.resize(BasisSetSize); + Temp.resize(BasisSetSize, MAXINDEX); + Y.resize(ntargets, BasisSetSize); + dY.resize(ntargets, BasisSetSize); + d2Y.resize(ntargets, BasisSetSize); + } + else { + app_error() << " BasisSetBase::BasisSetSize == 0" << std::endl; + } + } + + /// clone the basis set + virtual BasisSetBaseT* + makeClone() const = 0; + /** return the basis set size */ + inline IndexType + getBasisSetSize() const + { + return BasisSetSize; + } + + /// resize the basis set + virtual void + setBasisSetSize(int nbs) = 0; + + virtual void + evaluateWithHessian(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateWithThirdDeriv(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateThirdDerivOnly(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateForWalkerMove(const ParticleSetT& P) = 0; + virtual void + evaluateForWalkerMove(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateAllForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void + evaluateForPtclMoveWithHessian(const ParticleSetT& P, int iat) = 0; +}; + +/** Base for real basis set + * + * Equivalent to BasisSetBase with minimum requirements + * Used by LCAO + */ +template +struct SoaBasisSetBaseT +{ + using value_type = T; + using vgl_type = VectorSoaContainer; + using vgh_type = VectorSoaContainer; + using vghgh_type = VectorSoaContainer; + using OffloadMWVGLArray = + Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = + Array>; // [walker, Orbs] + + /// size of the basis set + int BasisSetSize; + + virtual ~SoaBasisSetBaseT() = default; + inline int + getBasisSetSize() + { + return BasisSetSize; + } + + virtual SoaBasisSetBaseT* + makeClone() const = 0; + virtual void + setBasisSetSize(int nbs) = 0; + + // Evaluates value, gradient, and laplacian for electron "iat". Parks them + // into a temporary data structure "vgl". + virtual void + evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) = 0; + // Evaluates value, gradient, and laplacian for electron "iat". places them + // in a offload array for batched code. + virtual void + mw_evaluateVGL(const RefVectorWithLeader>& P_list, int iat, + OffloadMWVGLArray& vgl) = 0; + // Evaluates value for electron "iat". places it in a offload array for + // batched code. + virtual void + mw_evaluateValue(const RefVectorWithLeader>& P_list, + int iat, OffloadMWVArray& v) = 0; + // Evaluates value, gradient, and Hessian for electron "iat". Parks them + // into a temporary data structure "vgh". + virtual void + evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) = 0; + // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron + // "iat". Parks them into a temporary data structure "vghgh". + virtual void + evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) = 0; + // Evaluates the x,y, and z components of ionic gradient associated with + // "jion" of value. Parks the raw data into "vgl" container. + virtual void + evaluateGradSourceV(const ParticleSetT& P, int iat, + const ParticleSetT& ions, int jion, vgl_type& vgl) = 0; + // Evaluates the x,y, and z components of ionic gradient associated with + // "jion" value, gradient, and laplacian. + // Parks the raw data into "vghgh" container. + virtual void + evaluateGradSourceVGL(const ParticleSetT& P, int iat, + const ParticleSetT& ions, int jion, vghgh_type& vghgh) = 0; + virtual void + evaluateV(const ParticleSetT& P, int iat, value_type* restrict vals) = 0; + virtual bool + is_S_orbital(int mo_idx, int ao_idx) + { + return false; + } + + /// Determine which orbitals are S-type. Used for cusp correction. + virtual void + queryOrbitalsForSType(const std::vector& corrCenter, + std::vector& is_s_orbital) const + { + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h index 35f1580d16..7d5d19b323 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h @@ -1,22 +1,25 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory Jeongnim Kim, jeongnim.kim@gmail.com, +// University of Illinois at Urbana-Champaign Mark A. +// Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory // Ye Luo, yeluo@anl.gov, Argonne National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - /** @file BsplineSetT.h * - * BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implementation + * BsplineSet is a SPOSet derived class and serves as a base class for B-spline + * SPO C2C/C2R/R2R implementation */ #ifndef QMCPLUSPLUS_BSPLINESETT_H #define QMCPLUSPLUS_BSPLINESETT_H @@ -28,221 +31,226 @@ namespace qmcplusplus { /** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R. - * Its derived template classes manage the storage and evaluation at given precision. - * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class. + * Its derived template classes manage the storage and evaluation at given + * precision. BsplineSet also implements a few fallback routines in case + * optimized implementation is not necessary in the derived class. */ -template +template class BsplineSetT : public SPOSetT { public: - using PosType = typename SPOSetT::PosType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - using GGGVector = typename SPOSetT::GGGVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGMatrix = typename SPOSetT::GGGMatrix; - - using value_type = typename SPOSetT::ValueMatrix::value_type; - using grad_type = typename SPOSetT::GradMatrix::value_type; - - // used in derived classes - using RealType = typename SPOSetT::RealType; - using ValueType = typename SPOSetT::ValueType; - - BsplineSetT(const std::string& my_name) : SPOSetT(my_name), MyIndex(0), first_spo(0), last_spo(0) {} - - virtual bool isComplex() const = 0; - virtual std::string getKeyword() const = 0; - - auto& getHalfG() const { return HalfG; } - - inline void init_base(int n) - { - kPoints.resize(n); - MakeTwoCopies.resize(n); - BandIndexMap.resize(n); - for (int i = 0; i < n; i++) - BandIndexMap[i] = i; - } - - ///remap kpoints to group general kpoints & special kpoints - int remap_kpoints() - { - std::vector k_copy(kPoints); - const int nk = kPoints.size(); - int nCB = 0; - //two pass - for (int i = 0; i < nk; ++i) + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using GGGVector = typename SPOSetT::GGGVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + + using value_type = typename SPOSetT::ValueMatrix::value_type; + using grad_type = typename SPOSetT::GradMatrix::value_type; + + // used in derived classes + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; + + BsplineSetT(const std::string& my_name) : + SPOSetT(my_name), + MyIndex(0), + first_spo(0), + last_spo(0) + { + } + + virtual bool + isComplex() const = 0; + virtual std::string + getKeyword() const = 0; + + auto& + getHalfG() const + { + return HalfG; + } + + inline void + init_base(int n) + { + kPoints.resize(n); + MakeTwoCopies.resize(n); + BandIndexMap.resize(n); + for (int i = 0; i < n; i++) + BandIndexMap[i] = i; + } + + /// remap kpoints to group general kpoints & special kpoints + int + remap_kpoints() { - if (MakeTwoCopies[i]) - { - kPoints[nCB] = k_copy[i]; - BandIndexMap[nCB++] = i; - } + std::vector k_copy(kPoints); + const int nk = kPoints.size(); + int nCB = 0; + // two pass + for (int i = 0; i < nk; ++i) { + if (MakeTwoCopies[i]) { + kPoints[nCB] = k_copy[i]; + BandIndexMap[nCB++] = i; + } + } + int nRealBands = nCB; + for (int i = 0; i < nk; ++i) { + if (!MakeTwoCopies[i]) { + kPoints[nRealBands] = k_copy[i]; + BandIndexMap[nRealBands++] = i; + } + } + return nCB; // return the number of complex bands } - int nRealBands = nCB; - for (int i = 0; i < nk; ++i) + + std::unique_ptr> + makeClone() const override = 0; + + void + setOrbitalSetSize(int norbs) override { - if (!MakeTwoCopies[i]) - { - kPoints[nRealBands] = k_copy[i]; - BandIndexMap[nRealBands++] = i; - } + this->OrbitalSetSize = norbs; } - return nCB; //return the number of complex bands - } - std::unique_ptr> makeClone() const override = 0; + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + this->evaluateVGL(P, iat, v, g, l); + } + } + + void + mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override + { + assert(this == &spo_list.getLeader()); + const size_t nw = spo_list.size(); + std::vector mw_psi_v; + std::vector mw_dpsi_v; + std::vector mw_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + mw_psi_v.reserve(nw); + mw_dpsi_v.reserve(nw); + mw_d2psi_v.reserve(nw); + psi_v_list.reserve(nw); + dpsi_v_list.reserve(nw); + d2psi_v_list.reserve(nw); - void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; } + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + mw_psi_v.clear(); + mw_dpsi_v.clear(); + mw_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) + for (int iw = 0; iw < nw; iw++) { + mw_psi_v.emplace_back( + logdet_list[iw].get()[i], logdet_list[iw].get().cols()); + mw_dpsi_v.emplace_back( + dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); + mw_d2psi_v.emplace_back( + d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); + psi_v_list.push_back(mw_psi_v.back()); + dpsi_v_list.push_back(mw_dpsi_v.back()); + d2psi_v_list.push_back(mw_d2psi_v.back()); + } + + this->mw_evaluateVGL( + spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - this->evaluateVGL(P, iat, v, g, l); + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + this->evaluateVGH(P, iat, v, g, h); + } } - } - - void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override - { - assert(this == &spo_list.getLeader()); - const size_t nw = spo_list.size(); - std::vector mw_psi_v; - std::vector mw_dpsi_v; - std::vector mw_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - mw_psi_v.reserve(nw); - mw_dpsi_v.reserve(nw); - mw_d2psi_v.reserve(nw); - psi_v_list.reserve(nw); - dpsi_v_list.reserve(nw); - d2psi_v_list.reserve(nw); - - for (int iat = first, i = 0; iat < last; ++iat, ++i) + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override { - mw_psi_v.clear(); - mw_dpsi_v.clear(); - mw_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int iw = 0; iw < nw; iw++) - { - mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols()); - mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); - mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); - psi_v_list.push_back(mw_psi_v.back()); - dpsi_v_list.push_back(mw_dpsi_v.back()); - d2psi_v_list.push_back(mw_d2psi_v.back()); - } - - this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + GGGVector gh( + grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); + this->evaluateVGHGH(P, iat, v, g, h, gh); + } } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) + + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, + GradMatrix& gradphi) override { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - this->evaluateVGH(P, iat, v, g, h); + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) + + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); - this->evaluateVGHGH(P, iat, v, g, h, gh); + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. } - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + template + friend struct SplineSetReader; + friend struct BsplineReaderBase; protected: - static const int D = QMCTraits::DIM; - ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases - size_t MyIndex; - ///first index of the SPOs this Spline handles - size_t first_spo; - ///last index of the SPOs this Spline handles - size_t last_spo; - ///sign bits at the G/2 boundaries - TinyVector HalfG; - ///flags to unpack sin/cos - std::vector MakeTwoCopies; - /** kpoints for each unique orbitals. - * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated. - * Changing the sign requires updating all the evaluation code. - */ - std::vector kPoints; - ///remap splines to orbitals - aligned_vector BandIndexMap; - ///band offsets used for communication - std::vector offset; + static const int D = QMCTraits::DIM; + /// Index of this adoptor, when multiple adoptors are used for NUMA or + /// distributed cases + size_t MyIndex; + /// first index of the SPOs this Spline handles + size_t first_spo; + /// last index of the SPOs this Spline handles + size_t last_spo; + /// sign bits at the G/2 boundaries + TinyVector HalfG; + /// flags to unpack sin/cos + std::vector MakeTwoCopies; + /** kpoints for each unique orbitals. + * Note: for historic reason, this sign is opposite to what was used in DFT + * when orbitals were generated. Changing the sign requires updating all the + * evaluation code. + */ + std::vector kPoints; + /// remap splines to orbitals + aligned_vector BandIndexMap; + /// band offsets used for communication + std::vector offset; }; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp index 155dd8a220..e6b05e4cd3 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp @@ -170,7 +170,7 @@ inline void SplineC2CT::assign_v(const PointType& r, } template -void SplineC2CT::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +void SplineC2CT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -187,7 +187,7 @@ void SplineC2CT::evaluateValue(const ParticleSet& P, const int iat, ValueVect } template -void SplineC2CT::evaluateDetRatios(const VirtualParticleSet& VP, +void SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) @@ -376,7 +376,7 @@ inline void SplineC2CT::assign_vgl_from_l(const PointType& r, ValueVector& ps } template -void SplineC2CT::evaluateVGL(const ParticleSet& P, +void SplineC2CT::evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, @@ -517,7 +517,7 @@ void SplineC2CT::assign_vgh(const PointType& r, } template -void SplineC2CT::evaluateVGH(const ParticleSet& P, +void SplineC2CT::evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, @@ -774,7 +774,7 @@ void SplineC2CT::assign_vghgh(const PointType& r, } template -void SplineC2CT::evaluateVGHGH(const ParticleSet& P, +void SplineC2CT::evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h index fd55fcd9f2..a7ba99e272 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h @@ -149,9 +149,9 @@ class SplineC2CT : public BsplineSetT void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - void evaluateDetRatios(const VirtualParticleSet& VP, + void evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) override; @@ -165,7 +165,7 @@ class SplineC2CT : public BsplineSetT */ void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void evaluateVGL(const ParticleSet& P, + void evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, @@ -178,7 +178,7 @@ class SplineC2CT : public BsplineSetT int first, int last) const; - void evaluateVGH(const ParticleSet& P, + void evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, @@ -192,7 +192,7 @@ class SplineC2CT : public BsplineSetT int first = 0, int last = -1) const; - void evaluateVGHGH(const ParticleSet& P, + void evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, GradVector& dpsi, diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp index e4695e6c11..176cb5dee8 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp @@ -1,64 +1,68 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo, +// yeluo@anl.gov, Argonne National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineR2RT.h" #include "Concurrency/OpenMP.h" -#include "SplineR2RT.h" -#include "spline2/MultiBsplineEval.hpp" #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" namespace qmcplusplus { -template +template SplineR2RT::SplineR2RT(const SplineR2RT& in) = default; -template -inline void SplineR2RT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +inline void +SplineR2RT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, int twist, int ispline, int level) { - SplineInst->copy_spline(spline_r, ispline); + SplineInst->copy_spline(spline_r, ispline); } -template -bool SplineR2RT::read_splines(hdf_archive& h5f) +template +bool +SplineR2RT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineR2RT::write_splines(hdf_archive& h5f) +template +bool +SplineR2RT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -void SplineR2RT::storeParamsBeforeRotation() +template +void +SplineR2RT::storeParamsBeforeRotation() { - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); + const auto spline_ptr = SplineInst->getSplinePtr(); + const auto coefs_tot_size = spline_ptr->coefs_size; + coef_copy_ = std::make_shared>(coefs_tot_size); - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); + std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); } /* @@ -100,458 +104,497 @@ void SplineR2RT::storeParamsBeforeRotation() NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix" is very tall and skinny. */ -template -void SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +template +void +SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) { - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto BasisSetSize = coefs_tot_size / Nsplines; - const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding - assert(this->OrbitalSetSize == rot_mat.rows()); - assert(this->OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - // Apply rotation the dumb way b/c I can't get BLAS::gemm to work... - for (auto i = 0; i < BasisSetSize; i++) - { - for (auto j = 0; j < this->OrbitalSetSize; j++) - { - const auto cur_elem = Nsplines * i + j; - auto newval{0.}; - for (auto k = 0; k < this->OrbitalSetSize; k++) - { - const auto index = i * Nsplines + k; - newval += (*coef_copy_)[index] * rot_mat[k][j]; - } - spl_coefs[cur_elem] = newval; + // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp + const auto spline_ptr = SplineInst->getSplinePtr(); + assert(spline_ptr != nullptr); + const auto spl_coefs = spline_ptr->coefs; + const auto Nsplines = spline_ptr->num_splines; // May include padding + const auto coefs_tot_size = spline_ptr->coefs_size; + const auto BasisSetSize = coefs_tot_size / Nsplines; + const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding + assert(this->OrbitalSetSize == rot_mat.rows()); + assert(this->OrbitalSetSize == rot_mat.cols()); + + if (!use_stored_copy) { + assert(coef_copy_ != nullptr); + std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); } - } -} + // Apply rotation the dumb way b/c I can't get BLAS::gemm to work... + for (auto i = 0; i < BasisSetSize; i++) { + for (auto j = 0; j < this->OrbitalSetSize; j++) { + const auto cur_elem = Nsplines * i + j; + auto newval{0.}; + for (auto k = 0; k < this->OrbitalSetSize; k++) { + const auto index = i * Nsplines + k; + newval += (*coef_copy_)[index] * rot_mat[k][j]; + } + spl_coefs[cur_elem] = newval; + } + } +} -template -inline void SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) - const +template +inline void +SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, + ValueVector& psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; - const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST signed_one = (bc_sign & 1) ? -1 : 1; #pragma omp simd - for (size_t j = first; j < last; ++j) - psi[this->first_spo + j] = signed_one * myV[j]; + for (size_t j = first; j < last; ++j) + psi[this->first_spo + j] = signed_one * myV[j]; } -template -void SplineR2RT::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void +SplineR2RT::evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); #pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last); - } + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(bc_sign, myV, psi, first, last); + } } -template -void SplineR2RT::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +SplineR2RT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) { - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); #pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) { + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier + } + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), + tid, first, last); + const int last_real = + this->kPoints.size() < last ? this->kPoints.size() : last; + + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + const PointType& r = VP.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); + + spline2::evaluate3d( + SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(bc_sign, myV, psi, first, last_real); + ratios_private[iat][tid] = simd::dot( + psi.data() + first, psiinv.data() + first, last_real - first); + } } - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int last_real = this->kPoints.size() < last ? this->kPoints.size() : last; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last_real); - ratios_private[iat][tid] = simd::dot(psi.data() + first, psiinv.data() + first, last_real - first); + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + ratios[iat] = TT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } } -template -inline void SplineR2RT::assign_vgl(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const +template +inline void +SplineR2RT::assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], + GGt[5] + GGt[7], GGt[8]}; + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t psiIndex = this->first_spo + j; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); - dpsi[psiIndex][1] = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); - dpsi[psiIndex][2] = signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]); - d2psi[psiIndex] = signed_one * SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG); - } + for (size_t j = first; j < last; ++j) { + const size_t psiIndex = this->first_spo + j; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = + signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); + dpsi[psiIndex][1] = + signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); + dpsi[psiIndex][2] = + signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]); + d2psi[psiIndex] = signed_one * + SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG); + } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineR2RT::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void +SplineR2RT::assign_vgl_from_l( + int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); #pragma omp simd - for (int psiIndex = this->first_spo; psiIndex < this->last_spo; ++psiIndex) - { - const size_t j = psiIndex - this->first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * g0[j]; - dpsi[psiIndex][1] = signed_one * g1[j]; - dpsi[psiIndex][2] = signed_one * g2[j]; - d2psi[psiIndex] = signed_one * myL[j]; - } + for (int psiIndex = this->first_spo; psiIndex < this->last_spo; + ++psiIndex) { + const size_t j = psiIndex - this->first_spo; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = signed_one * g0[j]; + dpsi[psiIndex][1] = signed_one * g1[j]; + dpsi[psiIndex][2] = signed_one * g2[j]; + d2psi[psiIndex] = signed_one * myL[j]; + } } -template -void SplineR2RT::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +SplineR2RT::evaluateVGL(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); #pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(bc_sign, psi, dpsi, d2psi, first, last); - } + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgl(bc_sign, psi, dpsi, d2psi, first, last); + } } -template -void SplineR2RT::assign_vgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void +SplineR2RT::assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, int first, int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) - { - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g00, g01, g02); - const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g00, g01, g02); - const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g10, g11, g12); - const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - grad_grad_psi[psiIndex][0] = signed_one * h_xx_r; - grad_grad_psi[psiIndex][1] = signed_one * h_xy_r; - grad_grad_psi[psiIndex][2] = signed_one * h_xz_r; - grad_grad_psi[psiIndex][3] = signed_one * h_yx_r; - grad_grad_psi[psiIndex][4] = signed_one * h_yy_r; - grad_grad_psi[psiIndex][5] = signed_one * h_yz_r; - grad_grad_psi[psiIndex][6] = signed_one * h_zx_r; - grad_grad_psi[psiIndex][7] = signed_one * h_zy_r; - grad_grad_psi[psiIndex][8] = signed_one * h_zz_r; - } + for (size_t j = first; j < last; ++j) { + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; + const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; + const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = signed_one * myV[j]; + dpsi[psiIndex][0] = signed_one * dX_r; + dpsi[psiIndex][1] = signed_one * dY_r; + dpsi[psiIndex][2] = signed_one * dZ_r; + + const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g00, g01, g02); + const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g10, g11, g12); + const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g20, g21, g22); + const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g10, g11, g12, g00, g01, g02); + const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g10, g11, g12, g10, g11, g12); + const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g10, g11, g12, g20, g21, g22); + const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g20, g21, g22, g00, g01, g02); + const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g20, g21, g22, g10, g11, g12); + const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g20, g21, g22, g20, g21, g22); + + grad_grad_psi[psiIndex][0] = signed_one * h_xx_r; + grad_grad_psi[psiIndex][1] = signed_one * h_xy_r; + grad_grad_psi[psiIndex][2] = signed_one * h_xz_r; + grad_grad_psi[psiIndex][3] = signed_one * h_yx_r; + grad_grad_psi[psiIndex][4] = signed_one * h_yy_r; + grad_grad_psi[psiIndex][5] = signed_one * h_yz_r; + grad_grad_psi[psiIndex][6] = signed_one * h_zx_r; + grad_grad_psi[psiIndex][7] = signed_one * h_zy_r; + grad_grad_psi[psiIndex][8] = signed_one * h_zz_r; + } } -template -void SplineR2RT::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void +SplineR2RT::evaluateVGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); #pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last); - } + spline2::evaluate3d_vgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last); + } } -template -void SplineR2RT::assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void +SplineR2RT::assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first, + int last) const { - // protect last - last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - - //SIMD doesn't work quite right yet. Comment out until further debugging. - //#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const ST val_r = myV[j]; - - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = signed_one * val_r; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - /* const ST h_xx_r=f_xx_r; - const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; - const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; - const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; - const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; - const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ - - grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; - grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; - grad_grad_psi[psiIndex][2] = f_xz_r * signed_one; - grad_grad_psi[psiIndex][4] = f_yy_r * signed_one; - grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; - grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; - - //symmetry: - grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; - grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; - grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i; - const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; - const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; - const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; - const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i; - const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; - const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; - const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; - const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; - const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/ - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; - grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; - grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r; - grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r; - grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; - grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; - - //filling in the symmetric terms. Filling out the xij terms - grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; - - //Now for everything that's a permutation of the above: - grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5]; - - grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8]; - grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8]; - - grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r; - grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r; - grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r; - - grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8]; - grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8]; - - grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r; - } + // protect last + last = last < 0 ? + this->kPoints.size() : + (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST signed_one = (bc_sign & 1) ? -1 : 1; + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), + g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), + g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); + + // SIMD doesn't work quite right yet. Comment out until further debugging. + // #pragma omp simd + for (size_t j = first; j < last; ++j) { + const ST val_r = myV[j]; + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; + const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; + const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = signed_one * val_r; + dpsi[psiIndex][0] = signed_one * dX_r; + dpsi[psiIndex][1] = signed_one * dY_r; + dpsi[psiIndex][2] = signed_one * dZ_r; + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], + g20, g21, g22, g20, g21, g22); + + /* const ST h_xx_r=f_xx_r; + const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; + const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; + const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; + const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; + const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ + + grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; + grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; + grad_grad_psi[psiIndex][2] = f_xz_r * signed_one; + grad_grad_psi[psiIndex][4] = f_yy_r * signed_one; + grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; + grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; + + // symmetry: + grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; + grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; + grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, + g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10, + g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10, + g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10, + g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], + gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g20, + g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - + kX*kX*kX*val_i; const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - + (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; const ST gh_xxz_r= f3_xxz_r + +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; + const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - + (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; const ST gh_xyz_r= f3_xyz_r + +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - + kX*kY*kZ*val_i; const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - + (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; const ST gh_yyy_r= f3_yyy_r + + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; const ST gh_yyz_r= + f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - + (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; const ST gh_yzz_r= f3_yzz_r + +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; + const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - + kZ*kZ*kZ*val_i;*/ + //[x][xx] //These are the unique entries + grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; + grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; + grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r; + grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r; + grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; + grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; + + // filling in the symmetric terms. Filling out the xij terms + grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; + grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; + grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; + + // Now for everything that's a permutation of the above: + grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; + grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; + grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4]; + grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5]; + + grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2]; + grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8]; + grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5]; + grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8]; + + grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r; + grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r; + grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r; + + grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5]; + grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5]; + grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8]; + grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8]; + + grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r; + } } -template -void SplineR2RT::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void +SplineR2RT::evaluateVGHGH(const ParticleSetT& P, const int iat, + ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); + const PointType& r = P.activeR(iat); + PointType ru; + int bc_sign = convertPos(r, ru); #pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last); - } + { + int first, last; + FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), + omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh( + SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh( + bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last); + } } template class SplineR2RT; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index 302d745f05..f265561e18 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -1,222 +1,269 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo, +// yeluo@anl.gov, Argonne National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SPLINE_R2R_H #define QMCPLUSPLUS_SPLINE_R2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" #include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" #include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include namespace qmcplusplus { /** class to match ST real spline with BsplineSet::ValueType (real) SPOs * @tparam ST precision of spline * - * Requires temporage storage and multiplication of the sign of the real part of the phase - * Internal storage ST type arrays are aligned and padded. + * Requires temporage storage and multiplication of the sign of the real part of + * the phase Internal storage ST type arrays are aligned and padded. */ -template +template class SplineR2RT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using GGGVector = typename BsplineSetT::GGGVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - using RealType = typename SPOSetT::RealType; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using GGGVector = typename BsplineSetT::GGGVector; + using ValueMatrix = typename BsplineSetT::ValueMatrix; + using GradVector = typename BsplineSetT::GradVector; + using HessVector = typename BsplineSetT::HessVector; + using ValueVector = typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + using RealType = typename SPOSetT::RealType; private: - bool IsGamma; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; + bool IsGamma; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; protected: - ///primitive cell - CrystalLattice PrimLattice; - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// primitive cell + CrystalLattice PrimLattice; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) {} - - SplineR2RT(const SplineR2RT& in); - virtual std::string getClassName() const override { return "SplineR2RT"; } - virtual std::string getKeyword() const override { return "SplineR2RT"; } - bool isComplex() const override { return false; }; - bool isRotationSupported() const override { return true; } - - std::unique_ptr> makeClone() const override { return std::make_unique>(*this); } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - - Some results (using older Berkeley branch) were published in [3]. - - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - const size_t npad = getAlignedSize(n); - this->myV.resize(npad); - this->myG.resize(npad); - this->myL.resize(npad); - this->myH.resize(npad); - this->mygH.resize(npad); - - IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0)); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - GGt = dot(transpose(PrimLattice.G), PrimLattice.G); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - /** convert position in PrimLattice unit and return sign */ - inline int convertPos(const PointType& r, PointType& ru) - { - ru = PrimLattice.toUnit(r); - int bc_sign = 0; - for (int i = 0; i < this->D; i++) - if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) - ru[i] = ST(0.0); - else - { - ST img = std::floor(ru[i]); - ru[i] -= img; - bc_sign += this->HalfG[i] * (int)img; - } - return bc_sign; - } - - void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) - const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; + SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) + { + } + + SplineR2RT(const SplineR2RT& in); + virtual std::string + getClassName() const override + { + return "SplineR2RT"; + } + virtual std::string + getKeyword() const override + { + return "SplineR2RT"; + } + bool + isComplex() const override + { + return false; + }; + bool + isRotationSupported() const override + { + return true; + } + + std::unique_ptr> + makeClone() const override + { + return std::make_unique>(*this); + } + + /// Store an original copy of the spline coefficients for orbital rotation + void + storeParamsBeforeRotation() override; + + /* + Implements orbital rotations via [1,2]. + Should be called by RotatedSPOs::apply_rotation() + + This implementation requires that NSPOs > Nelec. In other words, + if you want to run a orbopt wfn, you must include some virtual orbitals! + + Some results (using older Berkeley branch) were published in [3]. + + [1] Filippi & Fahy, JCP 112, (2000) + [2] Toulouse & Umrigar, JCP 126, (2007) + [3] Townsend et al., PRB 102, (2020) + */ + void + applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void + resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + const size_t npad = getAlignedSize(n); + this->myV.resize(npad); + this->myG.resize(npad); + this->myL.resize(npad); + this->myH.resize(npad); + this->mygH.resize(npad); + + IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && + (this->HalfG[2] == 0)); + } + + void + bcast_tables(Communicate* comm) + { + chunked_bcast(comm, SplineInst->getSplinePtr()); + } + + void + gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + gatherv(comm, SplineInst->getSplinePtr(), + SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void + create_spline(GT& xyz_g, BCT& xyz_bc) + { + GGt = dot(transpose(PrimLattice.G), PrimLattice.G); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) + << " MB allocated " + << "for the coefficients in 3D spline orbital representation" + << std::endl; + } + + inline void + flush_zero() + { + SplineInst->flush_zero(); + } + + void + set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, + int twist, int ispline, int level); + + bool + read_splines(hdf_archive& h5f); + + bool + write_splines(hdf_archive& h5f); + + /** convert position in PrimLattice unit and return sign */ + inline int + convertPos(const PointType& r, PointType& ru) + { + ru = PrimLattice.toUnit(r); + int bc_sign = 0; + for (int i = 0; i < this->D; i++) + if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) + ru[i] = ST(0.0); + else { + ST img = std::floor(ru[i]); + ru[i] -= img; + bc_sign += this->HalfG[i] * (int)img; + } + return bc_sign; + } + + void + assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, + int first, int last) const; + + void + evaluateValue( + const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) override; + + void + assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, + ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void + assign_vgl_from_l( + int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void + evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + void + assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, int first, int last) const; + + void + evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) override; + + void + assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi, + HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, + int last = -1) const; + + void + evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend struct SplineSetReader; + friend struct BsplineReaderBase; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 61fa6756fc..8e7c080bd6 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -24,6 +24,7 @@ add_subdirectory(detail) set(WFBASE_SRCS OptimizableFunctorBase.cpp VariableSet.cpp + VariableSetT.cpp WaveFunctionPool.cpp WaveFunctionComponent.cpp WaveFunctionComponentBuilder.cpp @@ -41,14 +42,16 @@ set(WFBASE_SRCS HarmonicOscillator/SHOSetBuilder.cpp HarmonicOscillator/SHOSetBuilderT.cpp ExampleHeBuilder.cpp - ExampleHeComponent.cpp) + ExampleHeComponent.cpp + RotatedSPOsT.cpp + SpinorSetT.cpp) if(NOT QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp) + set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp) endif(NOT QMC_COMPLEX) if(QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp SpinorSetT.cpp) + set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp) endif(QMC_COMPLEX) ######################## # build jastrows @@ -85,18 +88,21 @@ if(OHMMS_DIM MATCHES 3) LCAO/LCAOrbitalBuilderT.cpp LCAO/MultiQuinticSpline1D.cpp LCAO/AOBasisBuilder.cpp - LCAO/SoaLocalizedBasisSet.cpp) + LCAO/AOBasisBuilderT.cpp + LCAO/SoaLocalizedBasisSet.cpp + LCAO/SoaLocalizedBasisSetT.cpp + LCAO/LCAOSpinorBuilderT.cpp + LCAO/LCAOrbitalSetWithCorrectionT.cpp + LCAO/CuspCorrectionConstructionT.cpp + LCAO/SoaCuspCorrectionT.cpp) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp LCAO/LCAOSpinorBuilder.cpp) + set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp) else(QMC_COMPLEX) #LCAO cusp correction is not ready for complex set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp - LCAO/LCAOrbitalSetWithCorrectionT.cpp LCAO/CuspCorrectionConstruction.cpp - LCAO/CuspCorrectionConstructionT.cpp - LCAO/SoaCuspCorrection.cpp - LCAO/SoaCuspCorrectionT.cpp) + LCAO/SoaCuspCorrection.cpp) endif(QMC_COMPLEX) if(HAVE_EINSPLINE) diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp index 1a0c574b5b..31a3f71399 100644 --- a/src/QMCWaveFunctions/CompositeSPOSetT.cpp +++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp @@ -103,7 +103,7 @@ CompositeSPOSetT::makeClone() const template void CompositeSPOSetT::evaluateValue( - const ParticleSet& P, int iat, ValueVector& psi) + const ParticleSetT& P, int iat, ValueVector& psi) { int n = 0; for (int c = 0; c < components.size(); ++c) { @@ -117,7 +117,7 @@ CompositeSPOSetT::evaluateValue( template void -CompositeSPOSetT::evaluateVGL(const ParticleSet& P, int iat, +CompositeSPOSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { int n = 0; @@ -136,7 +136,7 @@ CompositeSPOSetT::evaluateVGL(const ParticleSet& P, int iat, template void -CompositeSPOSetT::evaluate_notranspose(const ParticleSet& P, int first, +CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { const int nat = last - first; @@ -155,7 +155,7 @@ CompositeSPOSetT::evaluate_notranspose(const ParticleSet& P, int first, template void -CompositeSPOSetT::evaluate_notranspose(const ParticleSet& P, int first, +CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet) { @@ -175,7 +175,7 @@ CompositeSPOSetT::evaluate_notranspose(const ParticleSet& P, int first, template void -CompositeSPOSetT::evaluate_notranspose(const ParticleSet& P, int first, +CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet) { diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h index db7344bff9..ec597a7eb4 100644 --- a/src/QMCWaveFunctions/CompositeSPOSetT.h +++ b/src/QMCWaveFunctions/CompositeSPOSetT.h @@ -79,10 +79,10 @@ class CompositeSPOSetT : public SPOSetT makeClone() const override; void - evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; void - evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; /// unimplemented functions call this to abort @@ -94,15 +94,15 @@ class CompositeSPOSetT : public SPOSetT // methods to be implemented in the future (possibly) void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) override; void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet) override; void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet, GGGMatrix& dddlogdet) override; }; diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp index 4257021557..b028eb039b 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp @@ -1,13 +1,13 @@ #include "OhmmsData/AttributeSet.h" #include "LongRange/StructFact.h" -#include "LongRange/KContainer.h" +#include "LongRange/KContainerT.h" #include "QMCWaveFunctions/ElectronGas/FreeOrbitalT.h" #include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" namespace qmcplusplus { template -FreeOrbitalBuilderT::FreeOrbitalBuilderT(ParticleSet& els, Communicate* comm, xmlNodePtr cur) +FreeOrbitalBuilderT::FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur) : SPOSetBuilderT("PW", comm), targetPtcl(els) {} @@ -55,7 +55,7 @@ std::unique_ptr> FreeOrbitalBuilderT::createSPOSetFromXML(xmlNodeP // extract npw k-points from container // kpts_cart is sorted by magnitude std::vector kpts(npw); - KContainer klists; + KContainerT klists; RealType kcut = lattice.LR_kc; // to-do: reduce kcut to >~ kf klists.updateKLists(lattice, kcut, lattice.ndim, twist); diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h index dcd69fd4b8..06e4b730a3 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h @@ -12,13 +12,13 @@ class FreeOrbitalBuilderT : public SPOSetBuilderT using RealType = typename SPOSetBuilderT::RealType; using PosType = typename SPOSetBuilderT::PosType; - FreeOrbitalBuilderT(ParticleSet& els, Communicate* comm, xmlNodePtr cur); + FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur); ~FreeOrbitalBuilderT() {} std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; private: - ParticleSet& targetPtcl; + ParticleSetT& targetPtcl; bool in_list(const int j, const std::vector l); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp index 82428ebfe1..497f65227e 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp @@ -1,18 +1,23 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2022 QMCPACK developers. // -// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron -// William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Yubo +// "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// William F Godoy, godoywf@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// #include "FreeOrbitalT.h" @@ -20,689 +25,653 @@ namespace qmcplusplus { - -template -void FreeOrbitalT::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& pvec, - GradVector& dpvec, - ValueVector& d2pvec) -{} - -template<> -void FreeOrbitalT::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& pvec, - GradVector& dpvec, - ValueVector& d2pvec) +template +void +FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; - } - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; } -template<> -void FreeOrbitalT::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& pvec, - GradVector& dpvec, - ValueVector& d2pvec) +template <> +void +FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; - } - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; } - -template<> -void FreeOrbitalT>::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& pvec, - GradVector& dpvec, - ValueVector& d2pvec) +template <> +void +FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; } -template<> -void FreeOrbitalT>::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& pvec, - GradVector& dpvec, - ValueVector& d2pvec) +template <> +void +FreeOrbitalT>::evaluateVGL( + const ParticleSetT>& P, int iat, ValueVector& pvec, + GradVector& dpvec, ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); - } -} + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } +} -template<> -void FreeOrbitalT::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) +template <> +void +FreeOrbitalT>::evaluateVGL( + const ParticleSetT>& P, int iat, ValueVector& pvec, + GradVector& dpvec, ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } - pvec[0] = 1.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } } -template<> -void FreeOrbitalT::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) +template <> +void +FreeOrbitalT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } - pvec[0] = 1.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; } -template<> -void FreeOrbitalT>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) +template <> +void +FreeOrbitalT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = std::complex(coskr, sinkr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; } -template<> -void FreeOrbitalT>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) +template <> +void +FreeOrbitalT>::evaluateValue( + const ParticleSetT>& P, int iat, ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = std::complex(coskr, sinkr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = std::complex(coskr, sinkr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } } -template -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) -{} - - -template<> -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) +template <> +void +FreeOrbitalT>::evaluateValue( + const ParticleSetT>& P, int iat, ValueVector& pvec) { - RealType sinkr, coskr; - float phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - } - } + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = std::complex(coskr, sinkr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; } - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - } } -template<> -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) +template +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - double phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); +} - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); +template <> +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& phi, GradMatrix& dphi, + HessMatrix& d2phi_mat) +{ + RealType sinkr, coskr; + float phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[j1](la, lb) = + -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = + -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + } + } } - } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; } - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - } } - -template<> -void FreeOrbitalT>::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) +template <> +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& phi, GradMatrix& dphi, + HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - std::complex phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - phi_of_r = std::complex(coskr, sinkr); - p[ik] = phi_of_r; - - dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); + RealType sinkr, coskr; + double phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[j1](la, lb) = + -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = + -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + } + } } - } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; } - } } -template<> -void FreeOrbitalT>::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) +template <> +void +FreeOrbitalT>::evaluate_notranspose( + const ParticleSetT>& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - std::complex phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - phi_of_r = std::complex(coskr, sinkr); - p[ik] = phi_of_r; - - dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[ik](la, la) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[ik](la, lb) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } } - } } - } } -template -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) -{} - -template<> -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template <> +void +FreeOrbitalT>::evaluate_notranspose( + const ParticleSetT>& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) - { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[ik](la, la) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[ik](la, lb) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } } - } } - - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; - } } -template<> -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); +} - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) - { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } +template <> +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& phi, GradMatrix& dphi, + HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = + sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[j1](la, lb) = + -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = + -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } + } + } } - } - } - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; - } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; + } } -template<> -void FreeOrbitalT>::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template <> +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& phi, GradMatrix& dphi, + HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = + sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[j1](la, lb) = + -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = + -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * + (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } + } + } } - } - for (int la = 0; la < OHMMS_DIM; la++) - { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } + + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; } - } } -template<> -void FreeOrbitalT>::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template <> +void +FreeOrbitalT>::evaluate_notranspose( + const ParticleSetT>& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[ik](la, la) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[ik](la, lb) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + for (int la = 0; la < OHMMS_DIM; la++) { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } + } + } +} - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); +template <> +void +FreeOrbitalT>::evaluate_notranspose( + const ParticleSetT>& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) { + hess[ik](la, la) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) { + hess[ik](la, lb) = + -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + for (int la = 0; la < OHMMS_DIM; la++) { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } } - } - for (int la = 0; la < OHMMS_DIM; la++) - { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } } - } } // generic implementation -template +template FreeOrbitalT::~FreeOrbitalT() -{} - -template -void FreeOrbitalT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) { - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - ValueVector d2p(d2phi[i], this->OrbitalSetSize); - evaluateVGL(P, iat, p, dp, d2p); - } } -//Explicit template specialization -template<> -FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) - : SPOSetT(my_name), - kvecs(kpts_cart), - mink(1), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template +void +FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) { - this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + for (int iat = first, i = 0; iat < last; iat++, i++) { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + ValueVector d2p(d2phi[i], this->OrbitalSetSize); + evaluateVGL(P, iat, p, dp, d2p); + } } -template<> -FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) - : SPOSetT(my_name), - kvecs(kpts_cart), - mink(1), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +// Explicit template specialization +template <> +FreeOrbitalT::FreeOrbitalT( + const std::string& my_name, const std::vector& kpts_cart) : + SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = + 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template<> -FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) - : SPOSetT>(my_name), - kvecs(kpts_cart), - mink(0), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template <> +FreeOrbitalT::FreeOrbitalT( + const std::string& my_name, const std::vector& kpts_cart) : + SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = maxk; // SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = + 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template<> -FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) - : SPOSetT>(my_name), - kvecs(kpts_cart), - mink(0), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template <> +FreeOrbitalT>::FreeOrbitalT( + const std::string& my_name, const std::vector& kpts_cart) : + SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = maxk; // SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } +template <> +FreeOrbitalT>::FreeOrbitalT( + const std::string& my_name, const std::vector& kpts_cart) : + SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) +{ + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); +} -template -void FreeOrbitalT::report(const std::string& pad) const +template +void +FreeOrbitalT::report(const std::string& pad) const { - app_log() << pad << "FreeOrbital report" << std::endl; - for (int ik = 0; ik < kvecs.size(); ik++) - { - app_log() << pad << ik << " " << kvecs[ik] << std::endl; - } - app_log() << pad << "end FreeOrbital report" << std::endl; - app_log().flush(); + app_log() << pad << "FreeOrbital report" << std::endl; + for (int ik = 0; ik < kvecs.size(); ik++) { + app_log() << pad << ik << " " << kvecs[ik] << std::endl; + } + app_log() << pad << "end FreeOrbital report" << std::endl; + app_log().flush(); } template class FreeOrbitalT; @@ -710,5 +679,4 @@ template class FreeOrbitalT; template class FreeOrbitalT>; template class FreeOrbitalT>; - } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h index d2f2f450b8..18e8899cca 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h @@ -1,18 +1,23 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2022 QMCPACK developers. // -// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron -// William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Yubo +// "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// William F Godoy, godoywf@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// #ifndef QMCPLUSPLUS_FREE_ORBITALT_H @@ -22,66 +27,74 @@ namespace qmcplusplus { -template +template class FreeOrbitalT : public SPOSetT { public: - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGMatrix = typename SPOSetT::GGGMatrix; - using RealType = typename SPOSetT::RealType; - using PosType = typename SPOSetT::PosType; - using ValueType = typename SPOSetT::ValueType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using ValueType = typename SPOSetT::ValueType; - FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart); - ~FreeOrbitalT(); + FreeOrbitalT( + const std::string& my_name, const std::vector& kpts_cart); + ~FreeOrbitalT(); - inline std::string getClassName() const final { return "FreeOrbital"; } + inline std::string + getClassName() const final + { + return "FreeOrbital"; + } - // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j - // i \in [first, last) - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) final; + // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j + // i \in [first, last) + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) final; - // plug r_i into all orbitals - void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final; - void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) final; + // plug r_i into all orbitals + void + evaluateVGL(const ParticleSetT& P, int i, ValueVector& pvec, + GradVector& dpvec, ValueVector& d2pvec) final; + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) final; - // hessian matrix is needed by backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) final; + // hessian matrix is needed by backflow + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) final; - // derivative of hessian is needed to optimize backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) override; + // derivative of hessian is needed to optimize backflow + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) override; - void report(const std::string& pad) const override; - // ---- begin required overrides - std::unique_ptr> makeClone() const final { return std::make_unique>(*this); } - void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); } - // required overrides end ---- + void + report(const std::string& pad) const override; + // ---- begin required overrides + std::unique_ptr> + makeClone() const final + { + return std::make_unique>(*this); + } + void + setOrbitalSetSize(int norbs) final + { + throw std::runtime_error("not implemented"); + } + // required overrides end ---- private: - const std::vector kvecs; // kvecs vectors - const int mink; // minimum k index - const int maxk; // maximum number of kvecs vectors - std::vector k2neg; // minus kvecs^2 + const std::vector kvecs; // kvecs vectors + const int mink; // minimum k index + const int maxk; // maximum number of kvecs vectors + std::vector k2neg; // minus kvecs^2 }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp index 0e1638f765..77ae1eda5a 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp @@ -1,204 +1,211 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #include "SHOSetBuilderT.h" -#include "QMCWaveFunctions/SPOSetInputInfo.h" + #include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/SPOSetInputInfo.h" #include "Utilities/IteratorUtility.h" #include "Utilities/string_utils.h" - namespace qmcplusplus { -template -SHOSetBuilderT::SHOSetBuilderT(ParticleSet& P, Communicate* comm) : SPOSetBuilderT("SHO", comm), Ps(P) +template +SHOSetBuilderT::SHOSetBuilderT(ParticleSetT& P, Communicate* comm) : + SPOSetBuilderT("SHO", comm), + Ps(P) { - this->ClassName = "SHOSetBuilderT"; - this->legacy = false; - app_log() << "Constructing SHOSetBuilderT" << std::endl; - reset(); + this->ClassName = "SHOSetBuilderT"; + this->legacy = false; + app_log() << "Constructing SHOSetBuilderT" << std::endl; + reset(); } -template +template SHOSetBuilderT::~SHOSetBuilderT() = default; -template -void SHOSetBuilderT::reset() +template +void +SHOSetBuilderT::reset() { - nstates = 0; - mass = -1.0; - energy = -1.0; - length = -1.0; - center = 0.0; + nstates = 0; + mass = -1.0; + energy = -1.0; + length = -1.0; + center = 0.0; } -template -std::unique_ptr> SHOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> +SHOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - APP_ABORT("SHOSetBuilderT::createSPOSetFromXML SHOSetBuilder should not use legacy interface"); + APP_ABORT("SHOSetBuilderT::createSPOSetFromXML SHOSetBuilder should not " + "use legacy interface"); - app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl; + app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl; - SPOSetInputInfo input(cur); + SPOSetInputInfo input(cur); - return createSPOSet(cur, input); + return createSPOSet(cur, input); } -template -std::unique_ptr> SHOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) +template +std::unique_ptr> +SHOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) { - app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl; - reset(); - - // read parameters - std::string spo_name = "sho"; - OhmmsAttributeSet attrib; - attrib.add(spo_name, "name"); - attrib.add(spo_name, "id"); - attrib.add(mass, "mass"); - attrib.add(energy, "energy"); - attrib.add(energy, "frequency"); - attrib.add(length, "length"); - attrib.add(center, "center"); - attrib.add(nstates, "size"); - attrib.put(cur); - - if (energy < 0.0) - energy = 1.0; - if (mass < 0.0 && length < 0.0) - length = 1.0; - if (mass < 0.0) - mass = 1.0 / (energy * length * length); - else if (length < 0.0) - length = 1.0 / std::sqrt(mass * energy); - - // initialize states and/or adjust basis - int smax = -1; - if (input.has_index_info) - smax = std::max(smax, input.max_index()); - if (input.has_energy_info) - { - smax = std::max(smax, (int)std::ceil(input.max_energy() / energy)); - } - if (smax < 0) - APP_ABORT("SHOSetBuilderT::Initialize\n invalid basis size"); - update_basis_states(smax); - - // create sho state request - indices_t& indices = input.get_indices(this->states); - std::vector sho_states; - for (int i = 0; i < indices.size(); ++i) - sho_states.push_back(basis_states[indices[i]]); - - // make the sposet - auto sho = std::make_unique>(spo_name, length, center, sho_states); - - sho->report(" "); - return sho; + app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl; + reset(); + + // read parameters + std::string spo_name = "sho"; + OhmmsAttributeSet attrib; + attrib.add(spo_name, "name"); + attrib.add(spo_name, "id"); + attrib.add(mass, "mass"); + attrib.add(energy, "energy"); + attrib.add(energy, "frequency"); + attrib.add(length, "length"); + attrib.add(center, "center"); + attrib.add(nstates, "size"); + attrib.put(cur); + + if (energy < 0.0) + energy = 1.0; + if (mass < 0.0 && length < 0.0) + length = 1.0; + if (mass < 0.0) + mass = 1.0 / (energy * length * length); + else if (length < 0.0) + length = 1.0 / std::sqrt(mass * energy); + + // initialize states and/or adjust basis + int smax = -1; + if (input.has_index_info) + smax = std::max(smax, input.max_index()); + if (input.has_energy_info) { + smax = std::max(smax, (int)std::ceil(input.max_energy() / energy)); + } + if (smax < 0) + APP_ABORT("SHOSetBuilderT::Initialize\n invalid basis size"); + update_basis_states(smax); + + // create sho state request + indices_t& indices = input.get_indices(this->states); + std::vector sho_states; + for (int i = 0; i < indices.size(); ++i) + sho_states.push_back(basis_states[indices[i]]); + + // make the sposet + auto sho = + std::make_unique>(spo_name, length, center, sho_states); + + sho->report(" "); + return sho; } -template -void SHOSetBuilderT::update_basis_states(int smax) +template +void +SHOSetBuilderT::update_basis_states(int smax) { - int states_required = smax - basis_states.size() + 1; - if (states_required > 0) - { - RealType N = smax + 1; - if (QMCTraits::DIM == 1) - nmax = smax; - else if (QMCTraits::DIM == 2) - nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5); - else if (QMCTraits::DIM == 3) - { - RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.))); - nmax = std::ceil(f / 3. + 1. / f - 2.); - } - else - APP_ABORT("SHOSetBuilderT::update_basis_states dimensions other than 1, 2, or 3 are not supported"); - int ndim = nmax + 1; - ind_dims[QMCTraits::DIM - 1] = 1; - for (int d = QMCTraits::DIM - 2; d > -1; --d) - ind_dims[d] = ind_dims[d + 1] * ndim; - int s = 0; - int ntot = pow(ndim, QMCTraits::DIM); - TinyVector qnumber; - for (int m = 0; m < ntot; ++m) - { - int n = 0; // principal quantum number - int nrem = m; - for (int d = 0; d < QMCTraits::DIM; ++d) - { - int i = nrem / ind_dims[d]; - nrem -= i * ind_dims[d]; - qnumber[d] = i; - n += i; - } - if (n <= nmax) - { - SHOState* st; - if (s < basis_states.size()) - st = basis_states[s]; + int states_required = smax - basis_states.size() + 1; + if (states_required > 0) { + RealType N = smax + 1; + if (QMCTraits::DIM == 1) + nmax = smax; + else if (QMCTraits::DIM == 2) + nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5); + else if (QMCTraits::DIM == 3) { + RealType f = std::exp(1.0 / 3.0 * + std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.))); + nmax = std::ceil(f / 3. + 1. / f - 2.); + } else - { - st = new SHOState(); - basis_states.add(st); + APP_ABORT("SHOSetBuilderT::update_basis_states dimensions other " + "than 1, 2, or 3 are not supported"); + int ndim = nmax + 1; + ind_dims[QMCTraits::DIM - 1] = 1; + for (int d = QMCTraits::DIM - 2; d > -1; --d) + ind_dims[d] = ind_dims[d + 1] * ndim; + int s = 0; + int ntot = pow(ndim, QMCTraits::DIM); + TinyVector qnumber; + for (int m = 0; m < ntot; ++m) { + int n = 0; // principal quantum number + int nrem = m; + for (int d = 0; d < QMCTraits::DIM; ++d) { + int i = nrem / ind_dims[d]; + nrem -= i * ind_dims[d]; + qnumber[d] = i; + n += i; + } + if (n <= nmax) { + SHOState* st; + if (s < basis_states.size()) + st = basis_states[s]; + else { + st = new SHOState(); + basis_states.add(st); + } + RealType e = energy * (n + .5 * QMCTraits::DIM); + st->set(qnumber, e); + s++; + } } - RealType e = energy * (n + .5 * QMCTraits::DIM); - st->set(qnumber, e); - s++; - } + basis_states.energy_sort(1e-6, true); + } + + // reset energy scale even if no states need to be added + for (int i = 0; i < basis_states.size(); ++i) { + SHOState& state = *basis_states[i]; + const TinyVector& qnumber = state.quantum_number; + int n = 0; + for (int d = 0; d < QMCTraits::DIM; ++d) + n += qnumber[d]; + state.energy = energy * (n + .5 * QMCTraits::DIM); } - basis_states.energy_sort(1e-6, true); - } - - // reset energy scale even if no states need to be added - for (int i = 0; i < basis_states.size(); ++i) - { - SHOState& state = *basis_states[i]; - const TinyVector& qnumber = state.quantum_number; - int n = 0; - for (int d = 0; d < QMCTraits::DIM; ++d) - n += qnumber[d]; - state.energy = energy * (n + .5 * QMCTraits::DIM); - } - - //somewhat redundant, but necessary - this->clear_states(0); - this->states[0]->finish(basis_states.states); - - if (basis_states.size() <= smax) - APP_ABORT("SHOSetBuilderT::update_basis_states failed to make enough states"); + + // somewhat redundant, but necessary + this->clear_states(0); + this->states[0]->finish(basis_states.states); + + if (basis_states.size() <= smax) + APP_ABORT("SHOSetBuilderT::update_basis_states failed to make enough " + "states"); } -template -void SHOSetBuilderT::report(const std::string& pad) +template +void +SHOSetBuilderT::report(const std::string& pad) { - app_log() << pad << "SHOSetBuilderT report" << std::endl; - app_log() << pad << " dimension = " << QMCTraits::DIM << std::endl; - app_log() << pad << " mass = " << mass << std::endl; - app_log() << pad << " frequency = " << energy << std::endl; - app_log() << pad << " energy = " << energy << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nstates = " << nstates << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " ind_dims = " << ind_dims << std::endl; - app_log() << pad << " # basis states = " << basis_states.size() << std::endl; - app_log() << pad << " basis_states" << std::endl; - for (int s = 0; s < basis_states.size(); ++s) - basis_states[s]->report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSetBuilderT report" << std::endl; - app_log().flush(); + app_log() << pad << "SHOSetBuilderT report" << std::endl; + app_log() << pad << " dimension = " << QMCTraits::DIM << std::endl; + app_log() << pad << " mass = " << mass << std::endl; + app_log() << pad << " frequency = " << energy << std::endl; + app_log() << pad << " energy = " << energy << std::endl; + app_log() << pad << " length = " << length << std::endl; + app_log() << pad << " center = " << center << std::endl; + app_log() << pad << " nstates = " << nstates << std::endl; + app_log() << pad << " nmax = " << nmax << std::endl; + app_log() << pad << " ind_dims = " << ind_dims << std::endl; + app_log() << pad << " # basis states = " << basis_states.size() + << std::endl; + app_log() << pad << " basis_states" << std::endl; + for (int s = 0; s < basis_states.size(); ++s) + basis_states[s]->report(pad + " " + int2string(s) + " "); + app_log() << pad << "end SHOSetBuilderT report" << std::endl; + app_log().flush(); } template class SHOSetBuilderT; diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h index 7b3e9430d8..96237ab55e 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h @@ -1,16 +1,18 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H #define QMCPLUSPLUS_SHO_BASIS_BUILDERT_H @@ -20,43 +22,48 @@ namespace qmcplusplus { -template +template class SHOSetBuilderT : public SPOSetBuilderT { public: - using RealType = typename SPOSetT::RealType; - using PosType = typename SPOSetT::PosType; - using indices_t = typename SPOSetBuilderT::indices_t; + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using indices_t = typename SPOSetBuilderT::indices_t; - ParticleSet& Ps; + ParticleSetT& Ps; - RealType length; - RealType mass; - RealType energy; - PosType center; + RealType length; + RealType mass; + RealType energy; + PosType center; - int nstates; - int nmax; - TinyVector ind_dims; + int nstates; + int nmax; + TinyVector ind_dims; - SPOSetInfoSimple basis_states; + SPOSetInfoSimple basis_states; - //construction/destruction - SHOSetBuilderT(ParticleSet& P, Communicate* comm); + // construction/destruction + SHOSetBuilderT(ParticleSetT& P, Communicate* comm); - ~SHOSetBuilderT() override; + ~SHOSetBuilderT() override; - //reset parameters - void reset(); + // reset parameters + void + reset(); - //SPOSetBuilder interface - std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + // SPOSetBuilder interface + std::unique_ptr> + createSPOSetFromXML(xmlNodePtr cur) override; - std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; + std::unique_ptr> + createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - //local functions - void update_basis_states(int smax); - void report(const std::string& pad = ""); + // local functions + void + update_basis_states(int smax); + void + report(const std::string& pad = ""); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp index 76a606151d..b4e55a258d 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp @@ -1,571 +1,555 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #include "SHOSetT.h" + #include "Utilities/string_utils.h" namespace qmcplusplus { template -SHOSetT::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states) - : SPOSetT(my_name), length(l), center(c) +SHOSetT::SHOSetT(const std::string& my_name, RealType l, PosType c, + const std::vector& sho_states) : + SPOSetT(my_name), + length(l), + center(c) { - state_info.resize(sho_states.size()); - for (int s = 0; s < sho_states.size(); ++s) - state_info[s] = *sho_states[s]; - initialize(); + state_info.resize(sho_states.size()); + for (int s = 0; s < sho_states.size(); ++s) + state_info[s] = *sho_states[s]; + initialize(); } template -void SHOSetT::initialize() +void +SHOSetT::initialize() { - using std::sqrt; + using std::sqrt; - this->OrbitalSetSize = state_info.size(); + this->OrbitalSetSize = state_info.size(); - qn_max = -1; - for (int s = 0; s < state_info.size(); ++s) + qn_max = -1; + for (int s = 0; s < state_info.size(); ++s) + for (int d = 0; d < QMCTraits::DIM; ++d) + qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); + qn_max += 1; + + nmax = -1; for (int d = 0; d < QMCTraits::DIM; ++d) - qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); - qn_max += 1; - - nmax = -1; - for (int d = 0; d < QMCTraits::DIM; ++d) - nmax = std::max(nmax, qn_max[d]); - - prefactors.resize(nmax); - hermite.resize(QMCTraits::DIM, nmax); - bvalues.resize(QMCTraits::DIM, nmax); - - if (nmax > 0) - { - prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < nmax; ++n) - prefactors[n] = prefactors[n - 1] / sqrt(2. * n); - } + nmax = std::max(nmax, qn_max[d]); + + prefactors.resize(nmax); + hermite.resize(QMCTraits::DIM, nmax); + bvalues.resize(QMCTraits::DIM, nmax); + + if (nmax > 0) { + prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); + for (int n = 1; n < nmax; ++n) + prefactors[n] = prefactors[n - 1] / sqrt(2. * n); + } } template SHOSetT::~SHOSetT() = default; template -std::unique_ptr> SHOSetT::makeClone() const { return std::make_unique>(*this); } - -template -void SHOSetT::report(const std::string& pad) const +std::unique_ptr> +SHOSetT::makeClone() const { - app_log() << pad << "SHOSet report" << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " qn_max = " << qn_max << std::endl; - app_log() << pad << " # states = " << state_info.size() << std::endl; - app_log() << pad << " states" << std::endl; - for (int s = 0; s < state_info.size(); ++s) - state_info[s].sho_report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSet report" << std::endl; - app_log().flush(); + return std::make_unique>(*this); } template -void SHOSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +void +SHOSetT::report(const std::string& pad) const { - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], this->size()); - evaluate_v(r, p); + app_log() << pad << "SHOSet report" << std::endl; + app_log() << pad << " length = " << length << std::endl; + app_log() << pad << " center = " << center << std::endl; + app_log() << pad << " nmax = " << nmax << std::endl; + app_log() << pad << " qn_max = " << qn_max << std::endl; + app_log() << pad << " # states = " << state_info.size() << std::endl; + app_log() << pad << " states" << std::endl; + for (int s = 0; s < state_info.size(); ++s) + state_info[s].sho_report(pad + " " + int2string(s) + " "); + app_log() << pad << "end SHOSet report" << std::endl; + app_log().flush(); } template -void SHOSetT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +void +SHOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], this->size()); - GradVector dp(&dpsi[0], this->size()); - ValueVector d2p(&d2psi[0], this->size()); - evaluate_vgl(r, p, dp, d2p); + const PosType& r(P.activeR(iat)); + ValueVector p(&psi[0], this->size()); + evaluate_v(r, p); } template -void SHOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +void +SHOSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector p(logdet[i], this->size()); - GradVector dp(dlogdet[i], this->size()); - ValueVector d2p(d2logdet[i], this->size()); - evaluate_vgl(P.R[iat], p, dp, d2p); - } + const PosType& r(P.activeR(iat)); + ValueVector p(&psi[0], this->size()); + GradVector dp(&dpsi[0], this->size()); + ValueVector d2p(&d2psi[0], this->size()); + evaluate_vgl(r, p, dp, d2p); } template -void SHOSetT::evaluate_v(PosType r, ValueVector& psi) +void +SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + ValueVector p(logdet[i], this->size()); + GradVector dp(dlogdet[i], this->size()); + ValueVector d2p(d2logdet[i], this->size()); + evaluate_vgl(P.R[iat], p, dp, d2p); + } } template -void SHOSetT::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +void +SHOSetT::evaluate_v(PosType r, ValueVector& psi) { - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); - evaluate_d1(x, psi, dpsi); - evaluate_d2(x, psi, d2psi); + PosType x = (r - center) / length; + evaluate_hermite(x); + evaluate_d0(x, psi); } template -void SHOSetT::evaluate_hermite(const PosType& xpos) +void +SHOSetT::evaluate_vgl( + PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - for (int d = 0; d < QMCTraits::DIM; ++d) - { - int nh = qn_max[d]; - if (nh > 0) - { - RealType x = xpos[d]; - hermite(d, 0) = 1.0; - RealType Hnm2 = 0.0; - RealType Hnm1 = 1.0; - for (int n = 1; n < nh; ++n) - { - RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2); - hermite(d, n) = Hn; - Hnm2 = Hnm1; - Hnm1 = Hn; - } - } - } + PosType x = (r - center) / length; + evaluate_hermite(x); + evaluate_d0(x, psi); + evaluate_d1(x, psi, dpsi); + evaluate_d2(x, psi, d2psi); } template -void SHOSetT::evaluate_d0(const PosType& xpos, ValueVector& psi) +void +SHOSetT::evaluate_hermite(const PosType& xpos) { - using std::exp; - for (int d = 0; d < QMCTraits::DIM; ++d) - { - RealType x = xpos[d]; - RealType g = exp(-.5 * x * x); - for (int n = 0; n < qn_max[d]; ++n) - { - bvalues(d, n) = prefactors[n] * g * hermite(d, n); + for (int d = 0; d < QMCTraits::DIM; ++d) { + int nh = qn_max[d]; + if (nh > 0) { + RealType x = xpos[d]; + hermite(d, 0) = 1.0; + RealType Hnm2 = 0.0; + RealType Hnm1 = 1.0; + for (int n = 1; n < nh; ++n) { + RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2); + hermite(d, n) = Hn; + Hnm2 = Hnm1; + Hnm1 = Hn; + } + } } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - RealType phi = 1.0; - for (int d = 0; d < QMCTraits::DIM; ++d) - phi *= bvalues(d, state.quantum_number[d]); - psi[s] = phi; - } } template -void SHOSetT::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) +void +SHOSetT::evaluate_d0(const PosType& xpos, ValueVector& psi) { - RealType ol = 1.0 / length; - for (int d = 0; d < QMCTraits::DIM; ++d) - { - RealType x = xpos[d]; - RealType Hnm1 = 0.0; - for (int n = 0; n < qn_max[d]; ++n) - { - RealType Hn = hermite(d, n); - bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol; - Hnm1 = Hn; + using std::exp; + for (int d = 0; d < QMCTraits::DIM; ++d) { + RealType x = xpos[d]; + RealType g = exp(-.5 * x * x); + for (int n = 0; n < qn_max[d]; ++n) { + bvalues(d, n) = prefactors[n] * g * hermite(d, n); + } + } + for (int s = 0; s < state_info.size(); ++s) { + const SHOState& state = state_info[s]; + RealType phi = 1.0; + for (int d = 0; d < QMCTraits::DIM; ++d) + phi *= bvalues(d, state.quantum_number[d]); + psi[s] = phi; } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - TinyVector dphi; - for (int d = 0; d < QMCTraits::DIM; ++d) - dphi[d] = bvalues(d, state.quantum_number[d]); - dphi *= psi[s]; - dpsi[s] = dphi; - } } template -void SHOSetT::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi) +void +SHOSetT::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) { - RealType ol2 = 1.0 / (length * length); - for (int d = 0; d < QMCTraits::DIM; ++d) - { - RealType x = xpos[d]; - RealType x2 = x * x; - for (int n = 0; n < qn_max[d]; ++n) - { - bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2; + RealType ol = 1.0 / length; + for (int d = 0; d < QMCTraits::DIM; ++d) { + RealType x = xpos[d]; + RealType Hnm1 = 0.0; + for (int n = 0; n < qn_max[d]; ++n) { + RealType Hn = hermite(d, n); + bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol; + Hnm1 = Hn; + } + } + for (int s = 0; s < state_info.size(); ++s) { + const SHOState& state = state_info[s]; + TinyVector dphi; + for (int d = 0; d < QMCTraits::DIM; ++d) + dphi[d] = bvalues(d, state.quantum_number[d]); + dphi *= psi[s]; + dpsi[s] = dphi; } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - T d2phi = 0.0; - for (int d = 0; d < QMCTraits::DIM; ++d) - d2phi += bvalues(d, state.quantum_number[d]); - d2phi *= psi[s]; - d2psi[s] = d2phi; - } } template -void SHOSetT::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +void +SHOSetT::evaluate_d2( + const PosType& xpos, ValueVector& psi, ValueVector& d2psi) { - using std::exp; - using std::sqrt; - - evaluate_vgl(r, psi, dpsi, d2psi); - - const int N = 6; - RealType H[N], dH[N], d2H[N], pre[N]; - RealType p[N], dp[N], d2p[N]; - - pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < N; ++n) - pre[n] = pre[n - 1] / sqrt(2. * n); - - for (int d = 0; d < QMCTraits::DIM; ++d) - { - RealType x = (r[d] - center[d]) / length; - RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x; - H[0] = 1; - dH[0] = 0; - d2H[0] = 0; - H[1] = 2 * x; - dH[1] = 2; - d2H[1] = 0; - H[2] = 4 * x2 - 2; - dH[2] = 8 * x; - d2H[2] = 8; - H[3] = 8 * x3 - 12 * x; - dH[3] = 24 * x2 - 12; - d2H[3] = 48 * x; - H[4] = 16 * x4 - 48 * x2 + 12; - dH[4] = 64 * x3 - 96 * x; - d2H[4] = 192 * x2 - 96; - H[5] = 32 * x5 - 160 * x3 + 120 * x; - dH[5] = 160 * x4 - 480 * x2 + 120; - d2H[5] = 640 * x3 - 960 * x; - RealType g = exp(-x2 / 2); - for (int n = 0; n < N; ++n) - { - p[n] = pre[n] * g * H[n]; - dp[n] = pre[n] * g * (-x * H[n] + dH[n]); - d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]); - } - app_log() << "eval check dim = " << d << " x = " << x << std::endl; - app_log() << " hermite check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << H[n] << std::endl; - app_log() << " " << n << " " << hermite(d, n) << std::endl; - } - app_log() << " phi d0 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << p[n] << std::endl; - app_log() << " " << n << " " << d0_values(d, n) << std::endl; - } - app_log() << " phi d1 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << dp[n] / p[n] << std::endl; - app_log() << " " << n << " " << d1_values(d, n) << std::endl; + RealType ol2 = 1.0 / (length * length); + for (int d = 0; d < QMCTraits::DIM; ++d) { + RealType x = xpos[d]; + RealType x2 = x * x; + for (int n = 0; n < qn_max[d]; ++n) { + bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2; + } } - app_log() << " phi d2 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << d2p[n] / p[n] << std::endl; - app_log() << " " << n << " " << d2_values(d, n) << std::endl; + for (int s = 0; s < state_info.size(); ++s) { + const SHOState& state = state_info[s]; + T d2phi = 0.0; + for (int d = 0; d < QMCTraits::DIM; ++d) + d2phi += bvalues(d, state.quantum_number[d]); + d2phi *= psi[s]; + d2psi[s] = d2phi; } - } } template -void SHOSetT::test_derivatives() +void +SHOSetT::evaluate_check( + PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - int n = 3; - PosType c = 5.123; - PosType L = 1.0; - PosType drg = L / n; - PosType dr = L / 1000; - int nphi = state_info.size(); - - PosType o2dr, odr2; - - ValueVector vpsi, vpsitmp; - GradVector vdpsi, vdpsin; - ValueVector vd2psi, vd2psin; - - - vpsi.resize(nphi); - vdpsi.resize(nphi); - vd2psi.resize(nphi); - - vpsitmp.resize(nphi); - vdpsin.resize(nphi); - vd2psin.resize(nphi); - - - ValueVector psi(&vpsi[0], this->size()); - GradVector dpsi(&vdpsi[0], this->size()); - ValueVector d2psi(&vd2psi[0], this->size()); - - ValueVector psitmp(&vpsitmp[0], this->size()); - GradVector dpsin(&vdpsin[0], this->size()); - ValueVector d2psin(&vd2psin[0], this->size()); - - - app_log() << " loading dr" << std::endl; - - RealType odr2sum = 0.0; - for (int d = 0; d < QMCTraits::DIM; ++d) - { - RealType odr = 1.0 / dr[d]; - o2dr[d] = .5 * odr; - odr2[d] = odr * odr; - odr2sum += odr2[d]; - } - - app_log() << "SHOSet::test_derivatives" << std::endl; - - const SimulationCell simulation_cell; - ParticleSet Ps(simulation_cell); - - int p = 0; - PosType r, rtmp; - for (int i = 0; i < n; ++i) - { - r[0] = c[0] + i * drg[0]; - for (int j = 0; j < n; ++j) - { - r[1] = c[1] + j * drg[1]; - for (int k = 0; k < n; ++k) - { - r[2] = c[2] + k * drg[2]; - - evaluate_vgl(r, psi, dpsi, d2psi); - - for (int m = 0; m < nphi; ++m) - d2psin[m] = -2 * odr2sum * psi[m]; - for (int d = 0; d < QMCTraits::DIM; ++d) - { - rtmp = r; - rtmp[d] += dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) - { - T phi = psitmp[m]; - dpsin[m][d] = phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } - rtmp = r; - rtmp[d] -= dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) - { - T phi = psitmp[m]; - dpsin[m][d] -= phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } + using std::exp; + using std::sqrt; + + evaluate_vgl(r, psi, dpsi, d2psi); + + const int N = 6; + RealType H[N], dH[N], d2H[N], pre[N]; + RealType p[N], dp[N], d2p[N]; + + pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); + for (int n = 1; n < N; ++n) + pre[n] = pre[n - 1] / sqrt(2. * n); + + for (int d = 0; d < QMCTraits::DIM; ++d) { + RealType x = (r[d] - center[d]) / length; + RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, + x5 = x * x * x * x * x; + H[0] = 1; + dH[0] = 0; + d2H[0] = 0; + H[1] = 2 * x; + dH[1] = 2; + d2H[1] = 0; + H[2] = 4 * x2 - 2; + dH[2] = 8 * x; + d2H[2] = 8; + H[3] = 8 * x3 - 12 * x; + dH[3] = 24 * x2 - 12; + d2H[3] = 48 * x; + H[4] = 16 * x4 - 48 * x2 + 12; + dH[4] = 64 * x3 - 96 * x; + d2H[4] = 192 * x2 - 96; + H[5] = 32 * x5 - 160 * x3 + 120 * x; + dH[5] = 160 * x4 - 480 * x2 + 120; + d2H[5] = 640 * x3 - 960 * x; + RealType g = exp(-x2 / 2); + for (int n = 0; n < N; ++n) { + p[n] = pre[n] * g * H[n]; + dp[n] = pre[n] * g * (-x * H[n] + dH[n]); + d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]); } - - RealType dphi_diff = 0.0; - RealType d2phi_diff = 0.0; - for (int m = 0; m < nphi; ++m) - for (int d = 0; d < QMCTraits::DIM; ++d) - dphi_diff = std::max(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d])); - for (int m = 0; m < nphi; ++m) - d2phi_diff = std::max(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); - app_log() << " " << p << " " << dphi_diff << " " << d2phi_diff << std::endl; - app_log() << " derivatives" << std::endl; - for (int m = 0; m < nphi; ++m) - { - std::string qn = ""; - for (int d = 0; d < QMCTraits::DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() << " " << qn; - for (int d = 0; d < QMCTraits::DIM; ++d) - app_log() << real(dpsi[m][d]) << " "; - app_log() << std::endl; - app_log() << " " << qn; - for (int d = 0; d < QMCTraits::DIM; ++d) - app_log() << real(dpsin[m][d]) << " "; - app_log() << std::endl; + app_log() << "eval check dim = " << d << " x = " << x << std::endl; + app_log() << " hermite check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) { + app_log() << " " << n << " " << H[n] << std::endl; + app_log() << " " << n << " " << hermite(d, n) << std::endl; } - app_log() << " laplacians" << std::endl; - PosType x = r / length; - for (int m = 0; m < nphi; ++m) - { - std::string qn = ""; - for (int d = 0; d < QMCTraits::DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() << " " << qn << real(d2psi[m] / psi[m]) << std::endl; - app_log() << " " << qn << real(d2psin[m] / psi[m]) << std::endl; + app_log() << " phi d0 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) { + app_log() << " " << n << " " << p[n] << std::endl; + app_log() << " " << n << " " << d0_values(d, n) << std::endl; + } + app_log() << " phi d1 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) { + app_log() << " " << n << " " << dp[n] / p[n] << std::endl; + app_log() << " " << n << " " << d1_values(d, n) << std::endl; + } + app_log() << " phi d2 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) { + app_log() << " " << n << " " << d2p[n] / p[n] << std::endl; + app_log() << " " << n << " " << d2_values(d, n) << std::endl; } - p++; - } } - } - - app_log() << "end SHOSet::test_derivatives" << std::endl; } template -void SHOSetT::test_overlap() +void +SHOSetT::test_derivatives() { - app_log() << "SHOSet::test_overlap" << std::endl; + int n = 3; + PosType c = 5.123; + PosType L = 1.0; + PosType drg = L / n; + PosType dr = L / 1000; + int nphi = state_info.size(); + + PosType o2dr, odr2; + + ValueVector vpsi, vpsitmp; + GradVector vdpsi, vdpsin; + ValueVector vd2psi, vd2psin; + + vpsi.resize(nphi); + vdpsi.resize(nphi); + vd2psi.resize(nphi); + + vpsitmp.resize(nphi); + vdpsin.resize(nphi); + vd2psin.resize(nphi); + + ValueVector psi(&vpsi[0], this->size()); + GradVector dpsi(&vdpsi[0], this->size()); + ValueVector d2psi(&vd2psi[0], this->size()); + + ValueVector psitmp(&vpsitmp[0], this->size()); + GradVector dpsin(&vdpsin[0], this->size()); + ValueVector d2psin(&vd2psin[0], this->size()); + + app_log() << " loading dr" << std::endl; + + RealType odr2sum = 0.0; + for (int d = 0; d < QMCTraits::DIM; ++d) { + RealType odr = 1.0 / dr[d]; + o2dr[d] = .5 * odr; + odr2[d] = odr * odr; + odr2sum += odr2[d]; + } + app_log() << "SHOSet::test_derivatives" << std::endl; + + const SimulationCellT simulation_cell; + ParticleSetT Ps(simulation_cell); + + int p = 0; + PosType r, rtmp; + for (int i = 0; i < n; ++i) { + r[0] = c[0] + i * drg[0]; + for (int j = 0; j < n; ++j) { + r[1] = c[1] + j * drg[1]; + for (int k = 0; k < n; ++k) { + r[2] = c[2] + k * drg[2]; + + evaluate_vgl(r, psi, dpsi, d2psi); + + for (int m = 0; m < nphi; ++m) + d2psin[m] = -2 * odr2sum * psi[m]; + for (int d = 0; d < QMCTraits::DIM; ++d) { + rtmp = r; + rtmp[d] += dr[d]; + evaluate_v(rtmp, psitmp); + for (int m = 0; m < nphi; ++m) { + T phi = psitmp[m]; + dpsin[m][d] = phi * o2dr[d]; + d2psin[m] += phi * odr2[d]; + } + rtmp = r; + rtmp[d] -= dr[d]; + evaluate_v(rtmp, psitmp); + for (int m = 0; m < nphi; ++m) { + T phi = psitmp[m]; + dpsin[m][d] -= phi * o2dr[d]; + d2psin[m] += phi * odr2[d]; + } + } + + RealType dphi_diff = 0.0; + RealType d2phi_diff = 0.0; + for (int m = 0; m < nphi; ++m) + for (int d = 0; d < QMCTraits::DIM; ++d) + dphi_diff = std::max(dphi_diff, + std::abs(dpsi[m][d] - dpsin[m][d]) / + std::abs(dpsin[m][d])); + for (int m = 0; m < nphi; ++m) + d2phi_diff = std::max(d2phi_diff, + std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); + app_log() << " " << p << " " << dphi_diff << " " << d2phi_diff + << std::endl; + app_log() << " derivatives" << std::endl; + for (int m = 0; m < nphi; ++m) { + std::string qn = ""; + for (int d = 0; d < QMCTraits::DIM; ++d) + qn += int2string(state_info[m].quantum_number[d]) + " "; + app_log() << " " << qn; + for (int d = 0; d < QMCTraits::DIM; ++d) + app_log() << real(dpsi[m][d]) << " "; + app_log() << std::endl; + app_log() << " " << qn; + for (int d = 0; d < QMCTraits::DIM; ++d) + app_log() << real(dpsin[m][d]) << " "; + app_log() << std::endl; + } + app_log() << " laplacians" << std::endl; + PosType x = r / length; + for (int m = 0; m < nphi; ++m) { + std::string qn = ""; + for (int d = 0; d < QMCTraits::DIM; ++d) + qn += int2string(state_info[m].quantum_number[d]) + " "; + app_log() + << " " << qn << real(d2psi[m] / psi[m]) << std::endl; + app_log() << " " << qn << real(d2psin[m] / psi[m]) + << std::endl; + } + p++; + } + } + } - //linear - int d = 0; + app_log() << "end SHOSet::test_derivatives" << std::endl; +} - app_log() << " length = " << length << std::endl; - app_log() << " prefactors" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - app_log() << " " << n << " " << prefactors[n] << std::endl; +template +void +SHOSetT::test_overlap() +{ + app_log() << "SHOSet::test_overlap" << std::endl; - app_log() << " 1d overlap" << std::endl; + // linear + int d = 0; - ValueVector vpsi; - vpsi.resize(this->size()); - ValueVector psi(&vpsi[0], this->size()); + app_log() << " length = " << length << std::endl; + app_log() << " prefactors" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + app_log() << " " << n << " " << prefactors[n] << std::endl; - double xmax = 4.0; - double dx = .1; - double dr = length * dx; + app_log() << " 1d overlap" << std::endl; - int nphi = qn_max[d]; - Array omat; - omat.resize(nphi, nphi); - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; + ValueVector vpsi; + vpsi.resize(this->size()); + ValueVector psi(&vpsi[0], this->size()); - PosType xp = 0.0; - for (double x = -xmax; x < xmax; x += dx) - { - xp[d] = x; - evaluate_hermite(xp); - evaluate_d0(xp, psi); + double xmax = 4.0; + double dx = .1; + double dr = length * dx; + int nphi = qn_max[d]; + Array omat; + omat.resize(nphi, nphi); for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr; - } + for (int j = 0; j < nphi; ++j) + omat(i, j) = 0.0; - for (int i = 0; i < nphi; ++i) - { - app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } - app_log() << std::endl; - - - //volumetric - app_log() << " 3d overlap" << std::endl; - double dV = dr * dr * dr; - nphi = this->size(); - omat.resize(nphi, nphi); - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; - for (double x = -xmax; x < xmax; x += dx) - for (double y = -xmax; y < xmax; y += dx) - for (double z = -xmax; z < xmax; z += dx) - { - xp[0] = x; - xp[1] = y; - xp[2] = z; + PosType xp = 0.0; + for (double x = -xmax; x < xmax; x += dx) { + xp[d] = x; evaluate_hermite(xp); evaluate_d0(xp, psi); for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += std::abs(psi[i] * psi[j]) * dV; - } - for (int i = 0; i < nphi; ++i) - { + for (int j = 0; j < nphi; ++j) + omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr; + } + + for (int i = 0; i < nphi; ++i) { + app_log() << std::endl; + for (int j = 0; j < nphi; ++j) + app_log() << omat(i, j) << " "; + } app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } - app_log() << std::endl; + // volumetric + app_log() << " 3d overlap" << std::endl; + double dV = dr * dr * dr; + nphi = this->size(); + omat.resize(nphi, nphi); + for (int i = 0; i < nphi; ++i) + for (int j = 0; j < nphi; ++j) + omat(i, j) = 0.0; + for (double x = -xmax; x < xmax; x += dx) + for (double y = -xmax; y < xmax; y += dx) + for (double z = -xmax; z < xmax; z += dx) { + xp[0] = x; + xp[1] = y; + xp[2] = z; + evaluate_hermite(xp); + evaluate_d0(xp, psi); + + for (int i = 0; i < nphi; ++i) + for (int j = 0; j < nphi; ++j) + omat(i, j) += std::abs(psi[i] * psi[j]) * dV; + } + for (int i = 0; i < nphi; ++i) { + app_log() << std::endl; + for (int j = 0; j < nphi; ++j) + app_log() << omat(i, j) << " "; + } + app_log() << std::endl; - app_log() << "end SHOSet::test_overlap" << std::endl; + app_log() << "end SHOSet::test_overlap" << std::endl; } template -void SHOSetT::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) +void +SHOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, + GGGMatrix& grad_grad_grad_logdet) { - not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); + not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); } template -void SHOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) +void +SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet) { - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); + not_implemented( + "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); } template -void SHOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) +void +SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); + not_implemented( + "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); } template -void SHOSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +void +SHOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& gradphi) { - not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); + not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); } template -void SHOSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) +void +SHOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) { - not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); + not_implemented( + "evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h index 6ef256df92..d8e89e9e0e 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h @@ -1,158 +1,177 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National +// Laboratory // -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National +// Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SHOSETT_H #define QMCPLUSPLUS_SHOSETT_H -#include "QMCWaveFunctions/SPOSetT.h" #include "QMCWaveFunctions/SPOInfo.h" +#include "QMCWaveFunctions/SPOSetT.h" namespace qmcplusplus { struct SHOState : public SPOInfo { - TinyVector quantum_number; - - SHOState() - { - quantum_number = -1; - energy = 0.0; - } - - ~SHOState() override {} - - inline void set(TinyVector qn, RealType e) - { - quantum_number = qn; - energy = e; - } - - inline void sho_report(const std::string& pad = "") const - { - app_log() << pad << "qn=" << quantum_number << " e=" << energy << std::endl; - } + TinyVector quantum_number; + + SHOState() + { + quantum_number = -1; + energy = 0.0; + } + + ~SHOState() override + { + } + + inline void + set(TinyVector qn, RealType e) + { + quantum_number = qn; + energy = e; + } + + inline void + sho_report(const std::string& pad = "") const + { + app_log() << pad << "qn=" << quantum_number << " e=" << energy + << std::endl; + } }; -template +template class SHOSetT : public SPOSetT { public: - using GradVector = typename SPOSetT::GradVector; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using value_type = typename ValueMatrix::value_type; - using grad_type = typename GradMatrix::value_type; - using RealType = typename SPOSetT::RealType; - using PosType = TinyVector; - using HessType = typename OrbitalSetTraits::HessType; - using HessMatrix = typename OrbitalSetTraits::HessMatrix; - using GGGType = TinyVector; - using GGGVector = Vector; - using GGGMatrix = Matrix; - - RealType length; - PosType center; - - int nmax; - TinyVector qn_max; - std::vector state_info; - std::vector prefactors; - Array hermite; - Array bvalues; - Array d0_values; - Array d1_values; - Array d2_values; - - //construction/destruction - SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states); - - ~SHOSetT() override; - - std::string getClassName() const override { return "SHOSet"; } - - void initialize(); - - //SPOSet interface methods - std::unique_ptr> makeClone() const override; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - - //local functions - void evaluate_v(PosType r, ValueVector& psi); - void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void evaluate_hermite(const PosType& xpos); - void evaluate_d0(const PosType& xpos, ValueVector& psi); - void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); - void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); - void report(const std::string& pad = "") const override; - void test_derivatives(); - void test_overlap(); - void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - //empty methods - /// number of orbitals is determined only by initial request - inline void setOrbitalSetSize(int norbs) override {} - - ///unimplemented functions call this to abort - inline void not_implemented(const std::string& method) - { - APP_ABORT("SHOSet::" + method + " has not been implemented."); - } - - - //methods to be implemented in the future (possibly) - void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& dphi, - HessMatrix& ddphi, - GradMatrix& dlapl_phi) override; + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using value_type = typename ValueMatrix::value_type; + using grad_type = typename GradMatrix::value_type; + using RealType = typename SPOSetT::RealType; + using PosType = TinyVector; + using HessType = typename OrbitalSetTraits::HessType; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGType = TinyVector; + using GGGVector = Vector; + using GGGMatrix = Matrix; + + RealType length; + PosType center; + + int nmax; + TinyVector qn_max; + std::vector state_info; + std::vector prefactors; + Array hermite; + Array bvalues; + Array d0_values; + Array d1_values; + Array d2_values; + + // construction/destruction + SHOSetT(const std::string& my_name, RealType l, PosType c, + const std::vector& sho_states); + + ~SHOSetT() override; + + std::string + getClassName() const override + { + return "SHOSet"; + } + + void + initialize(); + + // SPOSet interface methods + std::unique_ptr> + makeClone() const override; + + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + // local functions + void + evaluate_v(PosType r, ValueVector& psi); + void + evaluate_vgl( + PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + void + evaluate_hermite(const PosType& xpos); + void + evaluate_d0(const PosType& xpos, ValueVector& psi); + void + evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); + void + evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); + void + report(const std::string& pad = "") const override; + void + test_derivatives(); + void + test_overlap(); + void + evaluate_check( + PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + // empty methods + /// number of orbitals is determined only by initial request + inline void + setOrbitalSetSize(int norbs) override + { + } + + /// unimplemented functions call this to abort + inline void + not_implemented(const std::string& method) + { + APP_ABORT("SHOSet::" + method + " has not been implemented."); + } + + // methods to be implemented in the future (possibly) + void + evaluateThirdDeriv(const ParticleSetT& P, int first, int last, + GGGMatrix& dddlogdet) override; + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + HessMatrix& ddlogdet) override; + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet, + GGGMatrix& dddlogdet) override; + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, + GradMatrix& gradphi) override; + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& dphi, + HessMatrix& ddphi, GradMatrix& dlapl_phi) override; }; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp new file mode 100644 index 0000000000..022d6db4a5 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp @@ -0,0 +1,923 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign Jaron T. Krogel, +// krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A. +// Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "AOBasisBuilderT.h" + +#include "MultiFunctorAdapter.h" +#include "MultiQuinticSpline1D.h" +#include "Numerics/SoaCartesianTensor.h" +#include "Numerics/SoaSphericalTensor.h" +#include "OhmmsData/AttributeSet.h" +#include "RadialOrbitalSetBuilder.h" +#include "SoaAtomicBasisSetT.h" +#include "Utilities/ProgressReportEngine.h" + +namespace qmcplusplus +{ +template +AOBasisBuilderT::AOBasisBuilderT( + const std::string& eName, Communicate* comm) : + MPIObjectBase(comm), + addsignforM(false), + expandlm(GAUSSIAN_EXPAND), + Morder("gaussian"), + sph("default"), + basisType("Numerical"), + elementType(eName), + Normalized("yes") +{ + // mmorales: for "Cartesian Gaussian", m is an integer that maps + // the component to Gamess notation, see + // Numerics/CartesianTensor.h + nlms_id["n"] = q_n; + nlms_id["l"] = q_l; + nlms_id["m"] = q_m; + nlms_id["s"] = q_s; +} + +template +bool +AOBasisBuilderT::put(xmlNodePtr cur) +{ + ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)"); + // Register valid attributes attributes + OhmmsAttributeSet aAttrib; + aAttrib.add(basisType, "type"); + aAttrib.add(sph, "angular"); + aAttrib.add(addsignforM, "expM"); + aAttrib.add(Morder, "expandYlm"); + aAttrib.add(Normalized, "normalized"); + aAttrib.put(cur); + PRE.echo(cur); + if (sph == "spherical") + addsignforM = 1; // include (-1)^m + + if (Morder == "gaussian") + expandlm = GAUSSIAN_EXPAND; + else if (Morder == "natural") + expandlm = NATURAL_EXPAND; + else if (Morder == "no") + expandlm = DONOT_EXPAND; + else if (Morder == "pyscf") { + expandlm = MOD_NATURAL_EXPAND; + addsignforM = 1; + if (sph != "spherical") { + myComm->barrier_and_abort( + " Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); + } + } + + if (sph == "cartesian" || Morder == "Gamess") { + expandlm = CARTESIAN_EXPAND; + addsignforM = 0; + } + + if (Morder == "Dirac") { + expandlm = DIRAC_CARTESIAN_EXPAND; + addsignforM = 0; + if (sph != "cartesian") + myComm->barrier_and_abort( + " Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); + } + + // Numerical basis is a special case + if (basisType == "Numerical") + myComm->barrier_and_abort( + "Purely numerical atomic orbitals are not supported any longer."); + + return true; +} + +template +bool +AOBasisBuilderT::putH5(hdf_archive& hin) +{ + ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)"); + std::string CenterID, basisName; + + if (myComm->rank() == 0) { + hin.read(sph, "angular"); + hin.read(CenterID, "elementType"); + hin.read(Normalized, "normalized"); + hin.read(Morder, "expandYlm"); + hin.read(basisName, "name"); + } + + myComm->bcast(sph); + myComm->bcast(Morder); + myComm->bcast(CenterID); + myComm->bcast(Normalized); + myComm->bcast(basisName); + myComm->bcast(basisType); + myComm->bcast(addsignforM); + + if (sph == "spherical") + addsignforM = 1; // include (-1)^m + + if (Morder == "gaussian") + expandlm = GAUSSIAN_EXPAND; + else if (Morder == "natural") + expandlm = NATURAL_EXPAND; + else if (Morder == "no") + expandlm = DONOT_EXPAND; + else if (Morder == "pyscf") { + expandlm = MOD_NATURAL_EXPAND; + addsignforM = 1; + if (sph != "spherical") { + myComm->barrier_and_abort( + " Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); + } + } + + if (sph == "cartesian" || Morder == "Gamess") { + expandlm = CARTESIAN_EXPAND; + addsignforM = 0; + } + + if (Morder == "Dirac") { + expandlm = DIRAC_CARTESIAN_EXPAND; + addsignforM = 0; + if (sph != "cartesian") + myComm->barrier_and_abort( + " Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); + } + app_log() << R"(" << std::endl; + + return true; +} + +template +std::unique_ptr +AOBasisBuilderT::createAOSet(xmlNodePtr cur) +{ + ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)"); + app_log() << " AO BasisSet for " << elementType << "\n"; + + if (expandlm != CARTESIAN_EXPAND) { + if (addsignforM) + app_log() << " Spherical Harmonics contain (-1)^m factor" + << std::endl; + else + app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" + << std::endl; + } + + switch (expandlm) { + case (GAUSSIAN_EXPAND): + app_log() << " Angular momentum m expanded according to Gaussian" + << std::endl; + break; + case (NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l" + << std::endl; + break; + case (MOD_NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; + break; + case (CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; + break; + case (DIRAC_CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; + break; + default: + app_log() << " Angular momentum m is explicitly given." << std::endl; + } + + QuantumNumberType nlms; + std::string rnl; + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center + // process the basic property: maximun angular momentum, the number of basis + // functions to be added + std::vector radGroup; + xmlNodePtr cur1 = cur->xmlChildrenNode; + xmlNodePtr gptr = 0; + while (cur1 != NULL) { + std::string cname1((const char*)(cur1->name)); + if (cname1 == "basisGroup") { + radGroup.push_back(cur1); + const int l = std::stoi(getXMLAttributeValue(cur1, "l")); + Lmax = std::max(Lmax, l); + // expect that only Rnl is given + if (expandlm == CARTESIAN_EXPAND || + expandlm == DIRAC_CARTESIAN_EXPAND) + num += (l + 1) * (l + 2) / 2; + else if (expandlm) + num += 2 * l + 1; + else + num++; + } + else if (cname1 == "grid") { + gptr = cur1; + } + cur1 = cur1->next; + } + + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + auto aos = std::make_unique(Lmax, addsignforM); + aos->LM.resize(num); + aos->NL.resize(num); + + // Now, add distinct Radial Orbitals and (l,m) channels + RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); + radFuncBuilder.Normalized = (Normalized == "yes"); + radFuncBuilder.addGrid( + gptr, basisType); // assign a radial grid for the new center + std::vector::iterator it(radGroup.begin()); + std::vector::iterator it_end(radGroup.end()); + std::vector all_nl; + while (it != it_end) { + cur1 = (*it); + xmlAttrPtr att = cur1->properties; + while (att != NULL) { + std::string aname((const char*)(att->name)); + if (aname == "rid" || aname == "id") + // accept id/rid + { + rnl = (const char*)(att->children->content); + } + else { + std::map::iterator iit = nlms_id.find(aname); + if (iit != nlms_id.end()) + // valid for n,l,m,s + { + nlms[(*iit).second] = + atoi((const char*)(att->children->content)); + } + } + att = att->next; + } + // add Ylm channels + app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " + << nlms[2] << " " << nlms[3] << std::endl; + std::map::iterator rnl_it = RnlID.find(rnl); + if (rnl_it == RnlID.end()) { + int nl = aos->RnlID.size(); + if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms)) + RnlID[rnl] = nl; + all_nl.push_back(nl); + } + else { + all_nl.push_back((*rnl_it).second); + } + ++it; + } + + if (expandYlm(aos.get(), all_nl, expandlm) != num) + myComm->barrier_and_abort( + "expandYlm doesn't match the number of basis."); + radFuncBuilder.finalize(); + // aos->Rmax can be set small + // aos->setRmax(0); + aos->setBasisSetSize(-1); + app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() + << std::endl + << " Number of Radial functors = " << aos->RnlID.size() + << std::endl + << " Basis size = " << aos->getBasisSetSize() + << "\n\n"; + return aos; +} + +template +std::unique_ptr +AOBasisBuilderT::createAOSetH5(hdf_archive& hin) +{ + ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)"); + app_log() << " AO BasisSet for " << elementType << "\n"; + + if (expandlm != CARTESIAN_EXPAND) { + if (addsignforM) + app_log() << " Spherical Harmonics contain (-1)^m factor" + << std::endl; + else + app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" + << std::endl; + } + + switch (expandlm) { + case (GAUSSIAN_EXPAND): + app_log() << " Angular momentum m expanded according to Gaussian" + << std::endl; + break; + case (NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l" + << std::endl; + break; + case (MOD_NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; + break; + case (CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; + break; + case (DIRAC_CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; + break; + default: + app_log() << " Angular momentum m is explicitly given." << std::endl; + } + + QuantumNumberType nlms; + std::string rnl; + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center + + int numbasisgroups(0); + if (myComm->rank() == 0) { + if (!hin.readEntry(numbasisgroups, "NbBasisGroups")) + PRE.error( + "Could not read NbBasisGroups in H5; Probably Corrupt H5 file", + true); + } + myComm->bcast(numbasisgroups); + + for (int i = 0; i < numbasisgroups; i++) { + std::string basisGroupID = "basisGroup" + std::to_string(i); + int l(0); + if (myComm->rank() == 0) { + hin.push(basisGroupID); + hin.read(l, "l"); + hin.pop(); + } + myComm->bcast(l); + + Lmax = std::max(Lmax, l); + // expect that only Rnl is given + if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) + num += (l + 1) * (l + 2) / 2; + else if (expandlm) + num += 2 * l + 1; + else + num++; + } + + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + auto aos = std::make_unique(Lmax, addsignforM); + aos->LM.resize(num); + aos->NL.resize(num); + + // Now, add distinct Radial Orbitals and (l,m) channels + RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); + radFuncBuilder.Normalized = (Normalized == "yes"); + radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center + std::vector all_nl; + for (int i = 0; i < numbasisgroups; i++) { + std::string basisGroupID = "basisGroup" + std::to_string(i); + if (myComm->rank() == 0) { + hin.push(basisGroupID); + hin.read(rnl, "rid"); + hin.read(nlms[0], "n"); + hin.read(nlms[1], "l"); + } + myComm->bcast(rnl); + myComm->bcast(nlms[0]); + myComm->bcast(nlms[1]); + + // add Ylm channels + app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " + << nlms[2] << " " << nlms[3] << std::endl; + std::map::iterator rnl_it = RnlID.find(rnl); + if (rnl_it == RnlID.end()) { + int nl = aos->RnlID.size(); + if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms)) + RnlID[rnl] = nl; + all_nl.push_back(nl); + } + else { + all_nl.push_back((*rnl_it).second); + } + + if (myComm->rank() == 0) + hin.pop(); + } + + if (expandYlm(aos.get(), all_nl, expandlm) != num) + myComm->barrier_and_abort( + "expandYlm doesn't match the number of basis."); + radFuncBuilder.finalize(); + // aos->Rmax can be set small + // aos->setRmax(0); + aos->setBasisSetSize(-1); + app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() + << std::endl + << " Number of Radial functors = " << aos->RnlID.size() + << std::endl + << " Basis size = " << aos->getBasisSetSize() + << "\n\n"; + return aos; +} + +template +int +AOBasisBuilderT::expandYlm( + COT* aos, std::vector& all_nl, int expandlm) +{ + int num = 0; + if (expandlm == GAUSSIAN_EXPAND) { + app_log() << "Expanding Ylm according to Gaussian98" << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << 2 * l + 1 + << " spherical orbitals for l= " << l << std::endl; + switch (l) { + case (0): + aos->LM[num] = aos->Ylm.index(0, 0); + aos->NL[num] = nl; + num++; + break; + case (1): // px(1),py(-1),pz(0) + aos->LM[num] = aos->Ylm.index(1, 1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, -1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, 0); + aos->NL[num] = nl; + num++; + break; + default: // 0,1,-1,2,-2,...,l,-l + aos->LM[num] = aos->Ylm.index(l, 0); + aos->NL[num] = nl; + num++; + for (int tm = 1; tm <= l; tm++) { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(l, -tm); + aos->NL[num] = nl; + num++; + } + break; + } + } + } + else if (expandlm == MOD_NATURAL_EXPAND) { + app_log() + << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l" + << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) { + int l = aos->RnlID[nl][q_l]; + app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" + << std::endl; + if (l == 1) { + // px(1),py(-1),pz(0) + aos->LM[num] = aos->Ylm.index(1, 1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, -1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, 0); + aos->NL[num] = nl; + num++; + } + else { + for (int tm = -l; tm <= l; tm++, num++) { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; + } + } + } + } + else if (expandlm == NATURAL_EXPAND) { + app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) { + int l = aos->RnlID[nl][q_l]; + app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" + << std::endl; + for (int tm = -l; tm <= l; tm++, num++) { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; + } + } + } + else if (expandlm == CARTESIAN_EXPAND) { + app_log() << "Expanding Ylm (angular function) according to Gamess " + "using cartesian gaussians" + << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << (l + 1) * (l + 2) / 2 + << " cartesian gaussian orbitals for l= " << l + << std::endl; + int nbefore = 0; + for (int i = 0; i < l; i++) + nbefore += (i + 1) * (i + 2) / 2; + for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) { + aos->LM[num] = nbefore + i; + aos->NL[num] = nl; + num++; + } + } + } + else if (expandlm == DIRAC_CARTESIAN_EXPAND) { + app_log() << "Expanding Ylm (angular function) according to DIRAC " + "using cartesian gaussians" + << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << (l + 1) * (l + 2) / 2 + << " cartesian gaussian orbitals for l= " << l + << std::endl; + int nbefore = 0; + for (int i = 0; i < l; i++) + nbefore += (i + 1) * (i + 2) / 2; + switch (l) { + case (0): + aos->LM[num] = nbefore + 0; + aos->NL[num] = nl; + num++; + break; + case (1): + aos->LM[num] = nbefore + 0; + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; + aos->NL[num] = nl; + num++; + break; + case (2): + aos->LM[num] = nbefore + 0; // xx + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // xy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // xz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // yy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // yz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // zz + aos->NL[num] = nl; + num++; + break; + case (3): + aos->LM[num] = nbefore + 0; // xxx + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // xxy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // xxz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // xyy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // xyz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // xzz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // yyy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // yyz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // yzz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // zzz + aos->NL[num] = nl; + num++; + break; + case (4): + aos->LM[num] = nbefore + 0; // 400 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 310 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 301 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 220 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 211 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 202 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 130 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 121 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 112 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 103 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 040 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 031 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 022 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 013 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 004 + aos->NL[num] = nl; + num++; + break; + case (5): + aos->LM[num] = nbefore + 0; // 500 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 410 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 401 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 320 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 15; // 311 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 302 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 230 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 18; // 221 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 19; // 212 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 203 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 140 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 16; // 131 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 20; // 122 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 17; // 113 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 104 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 050 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 041 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 032 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 023 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 014 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 005 + aos->NL[num] = nl; + num++; + break; + case (6): + aos->LM[num] = nbefore + 0; // 600 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 510 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 501 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 420 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 15; // 411 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 402 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 18; // 330 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 21; // 321 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 22; // 312 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 19; // 303 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 240 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 23; // 231 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 27; // 222 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 25; // 213 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 204 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 150 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 16; // 141 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 24; // 132 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 26; // 123 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 17; // 114 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 105 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 060 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 051 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 042 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 20; // 033 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 024 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 015 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 006 + aos->NL[num] = nl; + num++; + break; + default: + myComm->barrier_and_abort( + "Cartesian Tensor only defined up to Lmax=6. Aborting\n"); + break; + } + } + } + else { + for (int ind = 0; ind < all_nl.size(); ind++) { + int nl = all_nl[ind]; + int l = aos->RnlID[nl][q_l]; + int m = aos->RnlID[nl][q_m]; + // assign the index for real Spherical Harmonic with (l,m) + aos->LM[num] = aos->Ylm.index(l, m); + // assign the index for radial orbital with (n,l) + aos->NL[num] = nl; + // increment number of basis functions + num++; + } + } + return num; +} + +template class AOBasisBuilderT, + SoaCartesianTensor, double>>; +template class AOBasisBuilderT, + SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT, + SoaCartesianTensor, float>>; +template class AOBasisBuilderT, + SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT, + SoaSphericalTensor, double>>; +template class AOBasisBuilderT, + SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT, + SoaSphericalTensor, float>>; +template class AOBasisBuilderT, + SoaSphericalTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT>, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT>, SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h new file mode 100644 index 0000000000..144b2b4dc9 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h @@ -0,0 +1,75 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_ATOMICORBITALBUILDERT_H +#define QMCPLUSPLUS_ATOMICORBITALBUILDERT_H + + +#include "Message/MPIObjectBase.h" +#include "hdf/hdf_archive.h" +#include "QMCWaveFunctions/SPOSet.h" + +namespace qmcplusplus +{ +/** atomic basisset builder + * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet + * + * Reimplement AtomiSPOSetBuilder.h + */ +template +class AOBasisBuilderT : public MPIObjectBase +{ +public: + enum + { + DONOT_EXPAND = 0, + GAUSSIAN_EXPAND = 1, + NATURAL_EXPAND, + CARTESIAN_EXPAND, + MOD_NATURAL_EXPAND, + DIRAC_CARTESIAN_EXPAND + }; + +private: + bool addsignforM; + int expandlm; + std::string Morder; + std::string sph; + std::string basisType; + std::string elementType; + std::string Normalized; + + ///map for the radial orbitals + std::map RnlID; + + ///map for (n,l,m,s) to its quantum number index + std::map nlms_id; + +public: + AOBasisBuilderT(const std::string& eName, Communicate* comm); + + bool put(xmlNodePtr cur); + bool putH5(hdf_archive& hin); + + SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; } + + std::unique_ptr createAOSet(xmlNodePtr cur); + std::unique_ptr createAOSetH5(hdf_archive& hin); + + int expandYlm(COT* aos, std::vector& all_nl, int expandlm = DONOT_EXPAND); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp index 1178491533..d41624e9db 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp @@ -81,8 +81,8 @@ CuspCorrectionConstructionT::removeSTypeOrbitals( // rc template void -CuspCorrectionConstructionT::computeRadialPhiBar(ParticleSet* targetP, - ParticleSet* sourceP, int curOrb_, int curCenter_, SPOSetT* Phi, +CuspCorrectionConstructionT::computeRadialPhiBar(ParticleSetT* targetP, + ParticleSetT* sourceP, int curOrb_, int curCenter_, SPOSetT* Phi, Vector& xgrid, Vector& rad_orb, const CuspCorrectionParametersT& data) { @@ -363,9 +363,9 @@ CuspCorrectionConstructionT::minimizeForRc(CuspCorrectionT& cusp, template void CuspCorrectionConstructionT::applyCuspCorrection( - const Matrix>& info, ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, LCAOrbitalSetT& lcao, - SoaCuspCorrectionT& cusp, const std::string& id) + const Matrix>& info, + ParticleSetT& targetPtcl, ParticleSetT& sourcePtcl, + LCAOrbitalSetT& lcao, SoaCuspCorrectionT& cusp, const std::string& id) { const int num_centers = info.rows(); const int orbital_set_size = info.cols(); @@ -459,9 +459,9 @@ CuspCorrectionConstructionT::applyCuspCorrection( template void CuspCorrectionConstructionT::generateCuspInfo( - Matrix>& info, const ParticleSet& targetPtcl, - const ParticleSet& sourcePtcl, const LCAOrbitalSetT& lcao, - const std::string& id, Communicate& Comm) + Matrix>& info, + const ParticleSetT& targetPtcl, const ParticleSetT& sourcePtcl, + const LCAOrbitalSetT& lcao, const std::string& id, Communicate& Comm) { const int num_centers = info.rows(); const int orbital_set_size = info.cols(); @@ -507,8 +507,8 @@ CuspCorrectionConstructionT::generateCuspInfo( #pragma omp parallel for schedule(dynamic) collapse(2) for (int center_idx = 0; center_idx < num_centers; center_idx++) { for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) { - ParticleSet localTargetPtcl(targetPtcl); - ParticleSet localSourcePtcl(sourcePtcl); + ParticleSetT localTargetPtcl(targetPtcl); + ParticleSetT localSourcePtcl(sourcePtcl); LCAOrbitalSetT local_phi("local_phi", std::unique_ptr::basis_type>( @@ -684,7 +684,7 @@ CuspCorrectionConstructionT::readCuspInfo(const std::string& cuspInfoFile, if (cname == "orbital") { int orb = -1; OhmmsAttributeSet orbAttrib; - QMCTraits::RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; + RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; orbAttrib.add(orb, "num"); orbAttrib.add(a1, "redo"); orbAttrib.add(a2, "C"); diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h index 300443c4a0..497898bfe8 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h @@ -22,7 +22,8 @@ class Communicate; namespace qmcplusplus { -class ParticleSet; +template +class ParticleSetT; template class OneMolecularOrbitalT @@ -64,7 +65,7 @@ class OneMolecularOrbitalT } OneMolecularOrbitalT( - ParticleSet* targetP, ParticleSet* sourceP, SPOSetPtr Phi) : + ParticleSetT* targetP, ParticleSetT* sourceP, SPOSetPtr Phi) : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), @@ -91,9 +92,9 @@ class OneMolecularOrbitalT ValueVector lap1; /// target ParticleSet - ParticleSet* targetPtcl; + ParticleSetT* targetPtcl; /// source ParticleSet - ParticleSet* sourcePtcl; + ParticleSetT* sourcePtcl; /// Index of orbital int curOrb; @@ -134,8 +135,8 @@ class CuspCorrectionConstructionT /// Compute the radial part of the corrected wavefunction static void - computeRadialPhiBar(ParticleSet* targetP, ParticleSet* sourceP, int curOrb_, - int curCenter_, SPOSetT* Phi, Vector& xgrid, + computeRadialPhiBar(ParticleSetT* targetP, ParticleSetT* sourceP, + int curOrb_, int curCenter_, SPOSetT* Phi, Vector& xgrid, Vector& rad_orb, const CuspCorrectionParametersT& data); /** Ideal local energy at one point @@ -274,13 +275,13 @@ class CuspCorrectionConstructionT // Modifies orbital set lcwc static void applyCuspCorrection(const Matrix>& info, - ParticleSet& targetPtcl, ParticleSet& sourcePtcl, + ParticleSetT& targetPtcl, ParticleSetT& sourcePtcl, LCAOrbitalSetT& lcao, SoaCuspCorrectionT& cusp, const std::string& id); static void generateCuspInfo(Matrix>& info, - const ParticleSet& targetPtcl, const ParticleSet& sourcePtcl, + const ParticleSetT& targetPtcl, const ParticleSetT& sourcePtcl, const LCAOrbitalSetT& lcao, const std::string& id, Communicate& Comm); diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp index 4e5a3fd2b0..6b71c88bd3 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp @@ -1,205 +1,208 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // -// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories // -// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories ////////////////////////////////////////////////////////////////////////////////////// -#include "LCAOSpinorBuilder.h" -#include "QMCWaveFunctions/SpinorSet.h" +#include "LCAOSpinorBuilderT.h" + +#include "Message/CommOperators.h" #include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/SpinorSetT.h" #include "Utilities/ProgressReportEngine.h" #include "hdf/hdf_archive.h" -#include "Message/CommOperators.h" namespace qmcplusplus { -template -LCAOSpinorBuilderT::LCAOSpinorBuilderT(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) - : LCAOrbitalBuilder(els, ions, comm, cur) +template +LCAOSpinorBuilderT::LCAOSpinorBuilderT(ParticleSetT& els, + ParticleSetT& ions, Communicate* comm, xmlNodePtr cur) : + LCAOrbitalBuilderT(els, ions, comm, cur) { - ClassName = "LCAOSpinorBuilder"; + this->ClassName = "LCAOSpinorBuilder"; - if (h5_path == "") - myComm->barrier_and_abort("LCAOSpinorBuilder only works with href"); + if (this->h5_path == "") + this->myComm->barrier_and_abort( + "LCAOSpinorBuilder only works with href"); } -template -std::unique_ptr> LCAOSpinorBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> +LCAOSpinorBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); - std::string spo_name(""), optimize("no"); - std::string basisset_name("LCAOBSet"); - OhmmsAttributeSet spoAttrib; - spoAttrib.add(spo_name, "name"); - spoAttrib.add(optimize, "optimize"); - spoAttrib.add(basisset_name, "basisset"); - spoAttrib.put(cur); - - BasisSet_t* myBasisSet = nullptr; - if (basisset_map_.find(basisset_name) == basisset_map_.end()) - myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); - else - myBasisSet = basisset_map_[basisset_name].get(); - - if (optimize == "yes") - app_log() << " SPOSet " << spo_name << " is optimizable\n"; - - std::unique_ptr upspo = - std::make_unique(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); - std::unique_ptr dnspo = - std::make_unique(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); - - loadMO(*upspo, *dnspo, cur); - - //create spinor and register up/dn - auto spinor_set = std::make_unique(spo_name); - spinor_set->set_spos(std::move(upspo), std::move(dnspo)); - return spinor_set; + ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); + std::string spo_name(""), optimize("no"); + std::string basisset_name("LCAOBSet"); + OhmmsAttributeSet spoAttrib; + spoAttrib.add(spo_name, "name"); + spoAttrib.add(optimize, "optimize"); + spoAttrib.add(basisset_name, "basisset"); + spoAttrib.put(cur); + + BasisSet_t* myBasisSet = nullptr; + if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end()) + this->myComm->barrier_and_abort( + "basisset \"" + basisset_name + "\" cannot be found\n"); + else + myBasisSet = this->basisset_map_[basisset_name].get(); + + if (optimize == "yes") + app_log() << " SPOSet " << spo_name << " is optimizable\n"; + + auto upspo = std::make_unique>( + spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); + auto dnspo = std::make_unique>( + spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); + + loadMO(*upspo, *dnspo, cur); + + // create spinor and register up/dn + auto spinor_set = std::make_unique>(spo_name); + spinor_set->set_spos(std::move(upspo), std::move(dnspo)); + return spinor_set; } -template -bool LCAOSpinorBuilderT::loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur) +template +bool +LCAOSpinorBuilderT::loadMO( + LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur) { - bool PBC = false; - int norb = up.getBasisSetSize(); - std::string debugc("no"); - OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "size"); - aAttrib.add(debugc, "debug"); - aAttrib.put(cur); - - up.setOrbitalSetSize(norb); - dn.setOrbitalSetSize(norb); - - xmlNodePtr occ_ptr = nullptr; - cur = cur->xmlChildrenNode; - while (cur != nullptr) - { - std::string cname((const char*)(cur->name)); - if (cname == "occupation") - { - occ_ptr = cur; + bool PBC = false; + int norb = up.getBasisSetSize(); + std::string debugc("no"); + OhmmsAttributeSet aAttrib; + aAttrib.add(norb, "size"); + aAttrib.add(debugc, "debug"); + aAttrib.put(cur); + + up.setOrbitalSetSize(norb); + dn.setOrbitalSetSize(norb); + + xmlNodePtr occ_ptr = nullptr; + cur = cur->xmlChildrenNode; + while (cur != nullptr) { + std::string cname((const char*)(cur->name)); + if (cname == "occupation") { + occ_ptr = cur; + } + cur = cur->next; } - cur = cur->next; - } - - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing or incorrect path to H5 file."); - hin.push("PBC"); - PBC = false; - hin.read(PBC, "PBC"); - hin.close(); - } - myComm->bcast(PBC); - if (PBC) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); - - bool success = putFromH5(up, dn, occ_ptr); - - - if (debugc == "yes") - { - app_log() << "UP: Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols() - << std::endl; - app_log() << *up.C << std::endl; - app_log() << "DN: Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols() - << std::endl; - app_log() << *dn.C << std::endl; - } - return success; -} -template -bool LCAOSpinorBuilderT::putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr occ_ptr) -{ - if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) - { - myComm->barrier_and_abort("LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); - return false; - } - - bool success = true; - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - istd::string setname = "/Super_Twist/eigenset_0"; - readRealMatrixFromH5(hin, setname, upReal); - setname += "_imag"; - readRealMatrixFromH5(hin, setname, upImag); - - af(!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or incorrect path to H5 file"); - - Matrix upReal; - Matrix upImag; - ssert(upReal.rows() == upImag.rows()); - assert(upReal.cols() == upImag.cols()); - - Matrix upTemp(upReal.rows(), upReal.cols()); - for (int i = 0; i < upTemp.rows(); i++) - { - for (int j = 0; j < upTemp.cols(); j++) - { - upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]); - } + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing " + "or incorrect path to H5 file."); + hin.push("PBC"); + PBC = false; + hin.read(PBC, "PBC"); + hin.close(); } - - Matrix dnReal; - Matrix dnImag; - setname = "/Super_Twist/eigenset_1"; - readRealMatrixFromH5(hin, setname, dnReal); - setname += "_imag"; - readRealMatrixFromH5(hin, setname, dnImag); - - assert(dnReal.rows() == dnImag.rows()); - assert(dnReal.cols() == dnImag.cols()); - - Matrix dnTemp(dnReal.rows(), dnReal.cols()); - for (int i = 0; i < dnTemp.rows(); i++) - { - for (int j = 0; j < dnTemp.cols(); j++) - { - dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]); - } + this->myComm->bcast(PBC); + if (PBC) + this->myComm->barrier_and_abort( + "LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); + + bool success = putFromH5(up, dn, occ_ptr); + + if (debugc == "yes") { + app_log() << "UP: Single-particle orbital coefficients dims=" + << up.C->rows() << " x " << up.C->cols() << std::endl; + app_log() << *up.C << std::endl; + app_log() << "DN: Single-particle orbital coefficients dims=" + << dn.C->rows() << " x " << dn.C->cols() << std::endl; + app_log() << *dn.C << std::endl; } + return success; +} - assert(upReal.rows() == dnReal.rows()); - assert(upReal.cols() == dnReal.cols()); - - Occ.resize(upReal.rows()); - success = putOccupation(up, occ_ptr); - - int norbs = up.getOrbitalSetSize(); - - int n = 0, i = 0; - while (i < norbs) - { - if (Occ[n] > 0.0) - { - std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); - std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); - i++; - } - n++; +template +bool +LCAOSpinorBuilderT::putFromH5( + LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr occ_ptr) +{ + if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) { + this->myComm->barrier_and_abort( + "LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); + return false; } - hin.close(); - } + bool success = true; + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { + Matrix upReal; + Matrix upImag; + std::string setname = "/Super_Twist/eigenset_0"; + this->readRealMatrixFromH5(hin, setname, upReal); + setname += "_imag"; + this->readRealMatrixFromH5(hin, setname, upImag); + + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort( + "LCAOSpinorBuilder::putFromH5 missing or " + "incorrect path to H5 file"); + + assert(upReal.rows() == upImag.rows()); + assert(upReal.cols() == upImag.cols()); + + Matrix upTemp(upReal.rows(), upReal.cols()); + for (int i = 0; i < upTemp.rows(); i++) { + for (int j = 0; j < upTemp.cols(); j++) { + upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; + } + } + + Matrix dnReal; + Matrix dnImag; + setname = "/Super_Twist/eigenset_1"; + this->readRealMatrixFromH5(hin, setname, dnReal); + setname += "_imag"; + this->readRealMatrixFromH5(hin, setname, dnImag); + + assert(dnReal.rows() == dnImag.rows()); + assert(dnReal.cols() == dnImag.cols()); + + Matrix dnTemp(dnReal.rows(), dnReal.cols()); + for (int i = 0; i < dnTemp.rows(); i++) { + for (int j = 0; j < dnTemp.cols(); j++) { + dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]); + } + } + + assert(upReal.rows() == dnReal.rows()); + assert(upReal.cols() == dnReal.cols()); + + this->Occ.resize(upReal.rows()); + success = this->putOccupation(up, occ_ptr); + + int norbs = up.getOrbitalSetSize(); + + int n = 0, i = 0; + while (i < norbs) { + if (this->Occ[n] > 0.0) { + std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); + std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); + i++; + } + n++; + } + + hin.close(); + } #ifdef HAVE_MPI - myComm->comm.broadcast_n(up.C->data(), up.C->size()); - myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); + this->myComm->comm.broadcast_n(up.C->data(), up.C->size()); + this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); #endif - return success; + return success; } template class LCAOSpinorBuilderT>; diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h index 62b40b43b1..e23014f44d 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h @@ -1,64 +1,74 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // -// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories // -// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories ////////////////////////////////////////////////////////////////////////////////////// - -#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H -#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H +#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H +#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" namespace qmcplusplus { /** @file LCAOSpinorBuidler.h - * - * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to read up and - * down channel from HDF5 and construct SpinorSet - * - */ -template + * + * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to + * read up and down channel from HDF5 and construct SpinorSet + * + */ +template class LCAOSpinorBuilderT : public LCAOrbitalBuilderT { public: - /** constructor + using BasisSet_t = typename LCAOrbitalBuilderT::BasisSet_t; + using RealType = typename LCAOrbitalBuilderT::RealType; + using ValueType = typename LCAOrbitalBuilderT::ValueType; + + /** constructor * \param els reference to the electrons * \param ions reference to the ions * * Derives from LCAOrbitalBuilder, but will require an h5_path to be set */ - LCAOSpinorBuilderT(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); + LCAOSpinorBuilderT(ParticleSetT& els, ParticleSetT& ions, + Communicate* comm, xmlNodePtr cur); - /** creates and returns SpinorSet - * - * Creates an up and down LCAOrbitalSet - * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file - * registers up and down into a SpinorSet and returns - */ - std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + /** creates and returns SpinorSet + * + * Creates an up and down LCAOrbitalSet + * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file + * registers up and down into a SpinorSet and returns + */ + std::unique_ptr> + createSPOSetFromXML(xmlNodePtr cur) override; private: - /** load the up and down MO sets - * - * checks to make sure not PBC and initialize the Occ vector. - * call putFromH5 to parse the up and down MO coefficients - */ - bool loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur); + /** load the up and down MO sets + * + * checks to make sure not PBC and initialize the Occ vector. + * call putFromH5 to parse the up and down MO coefficients + */ + bool + loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur); - /** parse h5 file for spinor info - * - * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part of up component of spinor - * assumes the h5 file as KPTS_0/eigenset_1(_imag) for the real/imag part of dn component of spinor - * reads the various coefficient matricies and broadcast - * after this, we have up/dn LCAOrbitalSet that can be registered to the SpinorSet - */ - bool putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr); + /** parse h5 file for spinor info + * + * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part + * of up component of spinor assumes the h5 file as KPTS_0/eigenset_1(_imag) + * for the real/imag part of dn component of spinor reads the various + * coefficient matricies and broadcast after this, we have up/dn + * LCAOrbitalSet that can be registered to the SpinorSet + */ + bool + putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr); }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp index 4e1e4f6bd1..39ea3953ee 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp @@ -19,22 +19,20 @@ #include "LCAOrbitalBuilderT.h" -#include "AOBasisBuilder.h" +#include "AOBasisBuilderT.h" +#include "CPU/math.hpp" +#include "CuspCorrectionConstructionT.h" #include "LCAOrbitalSetT.h" +#include "LCAOrbitalSetWithCorrectionT.h" +#include "Message/CommOperators.h" #include "MultiFunctorAdapter.h" #include "MultiQuinticSpline1D.h" #include "Numerics/SoaCartesianTensor.h" #include "Numerics/SoaSphericalTensor.h" #include "OhmmsData/AttributeSet.h" #include "QMCWaveFunctions/SPOSetT.h" -#include "SoaAtomicBasisSet.h" -#include "SoaLocalizedBasisSet.h" -#if !defined(QMC_COMPLEX) -#include "CuspCorrectionConstructionT.h" -#include "LCAOrbitalSetWithCorrectionT.h" -#endif -#include "CPU/math.hpp" -#include "Message/CommOperators.h" +#include "SoaAtomicBasisSetT.h" +#include "SoaLocalizedBasisSetT.h" #include "Utilities/ProgressReportEngine.h" #include "hdf/hdf_archive.h" @@ -61,8 +59,8 @@ struct ao_traits { using radial_type = MultiQuinticSpline1D; using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for numerical-spherical AO */ @@ -71,8 +69,8 @@ struct ao_traits { using radial_type = MultiQuinticSpline1D; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for GTO-cartesian AO */ @@ -81,8 +79,8 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for GTO-cartesian AO */ @@ -91,8 +89,8 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for STO-spherical AO */ @@ -101,8 +99,8 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; inline bool @@ -112,8 +110,8 @@ is_same(const xmlChar* a, const char* b) } template -LCAOrbitalBuilderT::LCAOrbitalBuilderT( - ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) : +LCAOrbitalBuilderT::LCAOrbitalBuilderT(ParticleSetT& els, + ParticleSetT& ions, Communicate* comm, xmlNodePtr cur) : SPOSetBuilderT("LCAO", comm), targetPtcl(els), sourcePtcl(ions), @@ -243,7 +241,7 @@ LCAOrbitalBuilderT::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) /** process atomicBasisSet per ion species */ switch (radialOrbType) { case (0): // numerical - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSet<0, 1>(cur); @@ -251,7 +249,7 @@ LCAOrbitalBuilderT::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) myBasisSet = createBasisSet<0, 0>(cur); break; case (1): // gto - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSet<1, 1>(cur); @@ -259,12 +257,12 @@ LCAOrbitalBuilderT::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) myBasisSet = createBasisSet<1, 0>(cur); break; case (2): // sto - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; myBasisSet = createBasisSet<2, 1>(cur); break; default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); break; } @@ -312,7 +310,7 @@ LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) /** process atomicBasisSet per ion species */ switch (radialOrbType) { case (0): // numerical - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSetH5<0, 1>(); @@ -320,7 +318,7 @@ LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) myBasisSet = createBasisSetH5<0, 0>(); break; case (1): // gto - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSetH5<1, 1>(); @@ -328,12 +326,12 @@ LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) myBasisSet = createBasisSetH5<1, 0>(); break; case (2): // sto - app_log() << " LCAO: SoaAtomicBasisSet" + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; myBasisSet = createBasisSetH5<2, 1>(); break; default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); break; } return std::unique_ptr(myBasisSet); @@ -374,7 +372,7 @@ LCAOrbitalBuilderT::createBasisSet(xmlNodePtr cur) auto it = std::find( ao_built_centers.begin(), ao_built_centers.end(), elementType); if (it == ao_built_centers.end()) { - AOBasisBuilder any(elementType, this->myComm); + AOBasisBuilderT any(elementType, this->myComm); any.put(cur); auto aoBasis = any.createAOSet(cur); if (aoBasis) { @@ -453,7 +451,7 @@ LCAOrbitalBuilderT::createBasisSetH5() auto it = std::find( ao_built_centers.begin(), ao_built_centers.end(), elementType); if (it == ao_built_centers.end()) { - AOBasisBuilder any(elementType, this->myComm); + AOBasisBuilderT any(elementType, this->myComm); any.putH5(hin); auto aoBasis = any.createAOSetH5(hin); if (aoBasis) { @@ -478,6 +476,176 @@ LCAOrbitalBuilderT::createBasisSetH5() return mBasisSet; } +template <> +std::unique_ptr> +LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, + const std::string& spo_name, std::string cusp_file, + std::unique_ptr&& myBasisSet) +{ + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = std::make_unique>( + spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } + + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info( + num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists( + this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file + << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo( + cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort( + "Invalid cusp correction file " + cusp_file); +#ifdef HAVE_MPI + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo( + info(center_idx, orb_idx), *this->myComm, 0); +#endif + } + else { + CuspCorrectionConstructionT::generateCuspInfo(info, + tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp( + cusp_file, info, spo_name); + } + + CuspCorrectionConstructionT::applyCuspCorrection( + info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); + + return sposet; +} + +template <> +std::unique_ptr> +LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, + const std::string& spo_name, std::string cusp_file, + std::unique_ptr&& myBasisSet) +{ + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = std::make_unique>( + spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } + + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info( + num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists( + this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file + << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo( + cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort( + "Invalid cusp correction file " + cusp_file); +#ifdef HAVE_MPI + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo( + info(center_idx, orb_idx), *this->myComm, 0); +#endif + } + else { + CuspCorrectionConstructionT::generateCuspInfo(info, + tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp( + cusp_file, info, spo_name); + } + + CuspCorrectionConstructionT::applyCuspCorrection( + info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); + + return sposet; +} + +template <> +std::unique_ptr>> +LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, const std::string&, std::string, std::unique_ptr&&) +{ + this->myComm->barrier_and_abort( + "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; +} + +template <> +std::unique_ptr>> +LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, const std::string&, std::string, std::unique_ptr&&) +{ + this->myComm->barrier_and_abort( + "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; +} + template std::unique_ptr> LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) @@ -501,18 +669,8 @@ LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) std::unique_ptr> sposet; if (doCuspCorrection) { -#if defined(QMC_COMPLEX) - this->myComm->barrier_and_abort( - "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " - "supported on complex LCAO."); -#else - app_summary() << " Using cusp correction." << std::endl; - auto lcwc = std::make_unique>( - spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); - sposet = std::move(lcwc); -#endif + createWithCuspCorrection( + cur, spo_name, cusp_file, std::move(myBasisSet)); } else { auto lcos = std::make_unique>( @@ -521,65 +679,6 @@ LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) sposet = std::move(lcos); } -#if !defined(QMC_COMPLEX) - if (doCuspCorrection) { - // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for further - // computations. The coordinates get set to nuclear positions, which - // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h - // This problem only appears when the electron positions are specified - // in the input. The random particle placement step executes after this - // part of the code, overwriting the leftover positions from the cusp - // initialization. - ParticleSet tmp_targetPtcl(targetPtcl); - - const int num_centers = sourcePtcl.getTotalNum(); - auto& lcwc = dynamic_cast&>(*sposet); - - const int orbital_set_size = lcwc.getOrbitalSetSize(); - Matrix> info( - num_centers, orbital_set_size); - - // set a default file name if not given - if (cusp_file.empty()) - cusp_file = spo_name + ".cuspInfo.xml"; - - bool file_exists( - this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); - this->myComm->bcast(file_exists); - app_log() << " Cusp correction file " << cusp_file - << (file_exists ? " exits." : " doesn't exist.") << std::endl; - - // validate file if it exists - if (file_exists) { - bool valid = 0; - if (this->myComm->rank() == 0) - valid = CuspCorrectionConstructionT::readCuspInfo( - cusp_file, spo_name, orbital_set_size, info); - this->myComm->bcast(valid); - if (!valid) - this->myComm->barrier_and_abort( - "Invalid cusp correction file " + cusp_file); -#ifdef HAVE_MPI - for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - CuspCorrectionConstructionT::broadcastCuspInfo( - info(center_idx, orb_idx), *this->myComm, 0); -#endif - } - else { - CuspCorrectionConstructionT::generateCuspInfo(info, - tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm); - if (this->myComm->rank() == 0) - CuspCorrectionConstructionT::saveCusp( - cusp_file, info, spo_name); - } - - CuspCorrectionConstructionT::applyCuspCorrection( - info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); - } -#endif - return sposet; } diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h index a746326df7..5cff3a5612 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h @@ -20,7 +20,6 @@ #ifndef QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H #define QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H -#include "QMCWaveFunctions/BasisSetBase.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalSetT.h" #include "QMCWaveFunctions/SPOSetBuilderT.h" @@ -39,23 +38,24 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT public: using BasisSet_t = typename LCAOrbitalSetT::basis_type; using RealType = typename LCAOrbitalSetT::RealType; + using ValueType = typename LCAOrbitalSetT::ValueType; using PosType = typename LCAOrbitalSetT::PosType; /** constructor * \param els reference to the electrons * \param ions reference to the ions */ - LCAOrbitalBuilderT( - ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); + LCAOrbitalBuilderT(ParticleSetT& els, ParticleSetT& ions, + Communicate* comm, xmlNodePtr cur); ~LCAOrbitalBuilderT() override; std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; protected: /// target ParticleSet - ParticleSet& targetPtcl; + ParticleSetT& targetPtcl; /// source ParticleSet - ParticleSet& sourcePtcl; + ParticleSetT& sourcePtcl; /// localized basis set map std::map> basisset_map_; /// if true, add cusp correction to orbitals @@ -125,6 +125,10 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT Matrix& Creal) const; private: + /// enable cusp correction + std::unique_ptr> + createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name, + std::string cusp_file, std::unique_ptr&& myBasisSet); /// load a basis set from XML input std::unique_ptr loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp index dba20478b7..6abd2d8b22 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // @@ -9,952 +9,948 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - #include "LCAOrbitalSetT.h" -#include "Numerics/MatrixOperators.h" + #include "CPU/BLAS.hpp" +#include "Numerics/MatrixOperators.h" #include namespace qmcplusplus { -template +template struct LCAOrbitalSetT::LCAOMultiWalkerMem : public Resource { - LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {} - LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} + LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") + { + } + LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() + { + } - std::unique_ptr makeClone() const override { return std::make_unique(*this); } + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } - OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] - OffloadMWVGLArray basis_mw; // [5][NW][NumAO] - OffloadMWVArray phi_v; // [NW][NumMO] - OffloadMWVArray basis_v_mw; // [NW][NumMO] + OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] + OffloadMWVGLArray basis_mw; // [5][NW][NumAO] + OffloadMWVArray phi_v; // [NW][NumMO] + OffloadMWVArray basis_v_mw; // [NW][NumMO] }; -template -LCAOrbitalSetT::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs) - : SPOSetT(my_name), - BasisSetSize(bs ? bs->getBasisSetSize() : 0), - Identity(true), - basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), - mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) -{ - if (!bs) - throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its basis set!"); - myBasisSet = std::move(bs); - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - this->OrbitalSetSize = BasisSetSize; - LCAOrbitalSetT::checkObject(); -} - -template -LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) - : SPOSetT(in), - myBasisSet(in.myBasisSet->makeClone()), - C(in.C), - BasisSetSize(in.BasisSetSize), - C_copy(in.C_copy), - Identity(in.Identity), - basis_timer_(in.basis_timer_), - mo_timer_(in.mo_timer_) -{ - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - if (!in.Identity) - { +template +LCAOrbitalSetT::LCAOrbitalSetT( + const std::string& my_name, std::unique_ptr&& bs) : + SPOSetT(my_name), + BasisSetSize(bs ? bs->getBasisSetSize() : 0), + Identity(true), + basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), + mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) +{ + if (!bs) + throw std::runtime_error( + "LCAOrbitalSetT cannot take nullptr as its basis set!"); + myBasisSet = std::move(bs); + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + this->OrbitalSetSize = BasisSetSize; + LCAOrbitalSetT::checkObject(); +} + +template +LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) : + SPOSetT(in), + myBasisSet(in.myBasisSet->makeClone()), + C(in.C), + BasisSetSize(in.BasisSetSize), + C_copy(in.C_copy), + Identity(in.Identity), + basis_timer_(in.basis_timer_), + mo_timer_(in.mo_timer_) +{ + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + if (!in.Identity) { + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + } + LCAOrbitalSetT::checkObject(); +} + +template +void +LCAOrbitalSetT::setOrbitalSetSize(int norbs) +{ + if (C) + throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot " + "reset existing MO coefficients"); + + Identity = false; + this->OrbitalSetSize = norbs; + C = std::make_shared(this->OrbitalSetSize, BasisSetSize); Tempv.resize(this->OrbitalSetSize); Temphv.resize(this->OrbitalSetSize); Tempghv.resize(this->OrbitalSetSize); - } - LCAOrbitalSetT::checkObject(); + LCAOrbitalSetT::checkObject(); } -template -void LCAOrbitalSetT::setOrbitalSetSize(int norbs) +template +void +LCAOrbitalSetT::checkObject() const { - if (C) - throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot reset existing MO coefficients"); + if (Identity) { + if (this->OrbitalSetSize != BasisSetSize) + throw std::runtime_error( + "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize " + "must be equal if Identity = true!"); + if (C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should be " + "nullptr if Identity = true!"); + } + else { + if (!C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should not " + "be nullptr if Identity = false!"); + if (this->OrbitalSetSize != C->rows()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C rows " + "doesn't match OrbitalSetSize."); + if (BasisSetSize != C->cols()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C columns " + "doesn't match BasisSetSize."); + } +} - Identity = false; - this->OrbitalSetSize = norbs; - C = std::make_shared(this->OrbitalSetSize, BasisSetSize); - Tempv.resize(this->OrbitalSetSize); - Temphv.resize(this->OrbitalSetSize); - Tempghv.resize(this->OrbitalSetSize); - LCAOrbitalSetT::checkObject(); +template +void +LCAOrbitalSetT::createResource(ResourceCollection& collection) const +{ + auto resource_index = + collection.addResource(std::make_unique()); } -template -void LCAOrbitalSetT::checkObject() const +template +void +LCAOrbitalSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - if (Identity) - { - if (this->OrbitalSetSize != BasisSetSize) - throw std::runtime_error( - "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!"); - if (C) - throw std::runtime_error("LCAOrbitalSetT::checkObject C should be nullptr if Identity = true!"); - } - else - { - if (!C) - throw std::runtime_error("LCAOrbitalSetT::checkObject C should not be nullptr if Identity = false!"); - if (this->OrbitalSetSize != C->rows()) - throw std::runtime_error("LCAOrbitalSetT::checkObject C rows doesn't match OrbitalSetSize."); - if (BasisSetSize != C->cols()) - throw std::runtime_error("LCAOrbitalSetT::checkObject C columns doesn't match BasisSetSize."); - } + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.mw_mem_handle_ = collection.lendResource(); } -template -void LCAOrbitalSetT::createResource(ResourceCollection& collection) const +template +void +LCAOrbitalSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto resource_index = collection.addResource(std::make_unique()); + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + collection.takebackResource(spo_leader.mw_mem_handle_); } -template -void LCAOrbitalSetT::acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const +template +std::unique_ptr> +LCAOrbitalSetT::makeClone() const { - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - spo_leader.mw_mem_handle_ = collection.lendResource(); + return std::make_unique>(*this); } -template -void LCAOrbitalSetT::releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const +template +void +LCAOrbitalSetT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& psi) { - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - collection.takebackResource(spo_leader.mw_mem_handle_); + if (Identity) { // PAY ATTENTION TO COMPLEX + myBasisSet->evaluateV(P, iat, psi.data()); + } + else { + Vector vTemp(Temp.data(0), BasisSetSize); + this->myBasisSet->evaluateV(P, iat, vTemp.data()); + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + MatrixOperators::product(C_partial_view, vTemp, psi); + } +} + +/** Find a better place for other user classes, Matrix should be padded as well + */ +template +static void +Product_ABt(const VectorSoaContainer& A, const Matrix& B, + VectorSoaContainer& C) +{ + constexpr char transa = 't'; + constexpr char transb = 'n'; + constexpr T zone(1); + constexpr T zero(0); + BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), + A.data(), A.capacity(), zero, C.data(), C.capacity()); +} + +template +void +LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2psi.data()); +} + +template +void +LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, + GradVector& dpsi, HessVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + } +} + +template +void +LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, int i, + ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi, + GGGMatrix& dghpsi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + + dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz + dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz + dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz + } +} + +template +void +LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi, + GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + + dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz + dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz + } } -template -std::unique_ptr> LCAOrbitalSetT::makeClone() const { return std::make_unique>(*this); } +template +void +LCAOrbitalSetT::evaluate_ionderiv_v_row_impl( + const vgl_type& temp, GradVector& dpsi) const +{ + const size_t output_size = dpsi.size(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[j][0] = -gx[j]; + dpsi[j][1] = -gy[j]; + dpsi[j][2] = -gz[j]; + } +} -template -void LCAOrbitalSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +LCAOrbitalSetT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - if (Identity) - { //PAY ATTENTION TO COMPLEX - myBasisSet->evaluateV(P, iat, psi.data()); - } - else - { - Vector vTemp(Temp.data(0), BasisSetSize); - this->myBasisSet->evaluateV(P, iat, vTemp.data()); - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - MatrixOperators::product(C_partial_view, vTemp, psi); - } -} - -/** Find a better place for other user classes, Matrix should be padded as well */ -template -static void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) -{ - constexpr char transa = 't'; - constexpr char transb = 'n'; - constexpr T zone(1); - constexpr T zero(0); - BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), - C.capacity()); -} - -template -void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2psi.data()); -} - -template -void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - } -} - -template -void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi, - GGGMatrix& dghpsi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xyz = temp.data(14); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - - dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz - dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz - dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -template -void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi, - GGGVector& dghpsi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xyz = temp.data(14); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - - dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz - dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -template -void LCAOrbitalSetT::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const -{ - const size_t output_size = dpsi.size(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[j][0] = -gx[j]; - dpsi[j][1] = -gy[j]; - dpsi[j][2] = -gz[j]; - } -} - -template -void LCAOrbitalSetT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - //TAKE CARE OF IDENTITY - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateVGL(P, iat, Temp); - } - - if (Identity) - evaluate_vgl_impl(Temp, psi, dpsi, d2psi); - else - { - assert(psi.size() <= this->OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temp, C_partial_view, Tempv); - } - evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); - } -} - -template -void LCAOrbitalSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; - - phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - - const size_t nw = phi_vgl_v.size(1); - - //TODO: make this cleaner? - for (int iw = 0; iw < nw; iw++) - { - const size_t output_size = psi_v_list[iw].get().size(); - std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); - std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); - // grads are [dim, walker, orb] in phi_vgl_v - // [walker][orb, dim] in dpsi_v_list - for (size_t idim = 0; idim < QMCTraits::DIM; idim++) - BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM); - } -} - -template -void LCAOrbitalSetT::mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& phi_vgl_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw; - basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); - - { - ScopedTimer local(basis_timer_); - myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw); - } - - if (Identity) - { - // output_size can be smaller than BasisSetSize - const size_t output_size = phi_vgl_v.size(2); - const size_t nw = phi_vgl_v.size(1); - - for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) - for (int iw = 0; iw < nw; iw++) - std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); - } - else - { - const size_t requested_orb_size = phi_vgl_v.size(2); - assert(requested_orb_size <= this->OrbitalSetSize); + // TAKE CARE OF IDENTITY { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - // TODO: make class for general blas interface in Platforms - // have instance of that class as member of LCAOrbitalSetT, call gemm through that - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), - requested_orb_size); - } - } -} - -template -void LCAOrbitalSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; - phi_v.resize(spo_list.size(), this->OrbitalSetSize); - mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); - - const size_t output_size = phi_v.size(1); - const size_t nw = phi_v.size(0); - - for (int iw = 0; iw < nw; iw++) - std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); -} - -template -void LCAOrbitalSetT::mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& phi_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - const size_t nw = spo_list.size(); - auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; - basis_v_mw.resize(nw, BasisSetSize); - - myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); - - if (Identity) - { - std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0)); - } - else - { - const size_t requested_orb_size = phi_v.size(1); - assert(requested_orb_size <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size(), // walkers - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), - requested_orb_size); - } -} - -template -void LCAOrbitalSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - const size_t nw = spo_list.size(); - for (size_t iw = 0; iw < nw; iw++) - { - for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) + ScopedTimer local(basis_timer_); + myBasisSet->evaluateVGL(P, iat, Temp); + } + + if (Identity) + evaluate_vgl_impl(Temp, psi, dpsi, d2psi); + else { + assert(psi.size() <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temp, C_partial_view, Tempv); + } + evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); + } +} + +template +void +LCAOrbitalSetT::mw_evaluateVGL( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; + + phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + + const size_t nw = phi_vgl_v.size(1); + + // TODO: make this cleaner? + for (int iw = 0; iw < nw; iw++) { + const size_t output_size = psi_v_list[iw].get().size(); + std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, + psi_v_list[iw].get().data()); + std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, + d2psi_v_list[iw].get().data()); + // grads are [dim, walker, orb] in phi_vgl_v + // [walker][orb, dim] in dpsi_v_list + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, + &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM); + } +} + +template +void +LCAOrbitalSetT::mw_evaluateVGLImplGEMM( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + OffloadMWVGLArray& phi_vgl_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw; + basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); + { - spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); - ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size()); + ScopedTimer local(basis_timer_); + myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw); + } + + if (Identity) { + // output_size can be smaller than BasisSetSize + const size_t output_size = phi_vgl_v.size(2); + const size_t nw = phi_vgl_v.size(1); + + for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iw = 0; iw < nw; iw++) + std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, + phi_vgl_v.data_at(idim, iw, 0)); + } + else { + const size_t requested_orb_size = phi_vgl_v.size(2); + assert(requested_orb_size <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view( + C->data(), requested_orb_size, BasisSetSize); + // TODO: make class for general blas interface in Platforms + // have instance of that class as member of LCAOrbitalSetT, call + // gemm through that + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), + BasisSetSize, 0, phi_vgl_v.data(), requested_orb_size); + } } - } } -template -void LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +LCAOrbitalSetT::mw_evaluateValue( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const { - Vector vTemp(Temp.data(0), BasisSetSize); - Vector invTemp(Temp.data(1), BasisSetSize); + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; + phi_v.resize(spo_list.size(), this->OrbitalSetSize); + mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); - { - ScopedTimer local(mo_timer_); - // when only a subset of orbitals is used, extract limited rows of C. - Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); - MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); - } + const size_t output_size = phi_v.size(1); + const size_t nw = phi_v.size(0); + + for (int iw = 0; iw < nw; iw++) + std::copy_n( + phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); +} + +template +void +LCAOrbitalSetT::mw_evaluateValueImplGEMM( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + OffloadMWVArray& phi_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + const size_t nw = spo_list.size(); + auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; + basis_v_mw.resize(nw, BasisSetSize); + + myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); + + if (Identity) { + std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, + phi_v.data_at(0, 0)); + } + else { + const size_t requested_orb_size = phi_v.size(1); + assert(requested_orb_size <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size(), // walkers + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), + BasisSetSize, 0, phi_v.data(), requested_orb_size); + } +} + +template +void +LCAOrbitalSetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + const size_t nw = spo_list.size(); + for (size_t iw = 0; iw < nw; iw++) { + for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) { + spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); + ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), + invRow_ptr_list[iw], psi_list[iw].get().size()); + } + } +} + +template +void +LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) +{ + Vector vTemp(Temp.data(0), BasisSetSize); + Vector invTemp(Temp.data(1), BasisSetSize); - for (size_t j = 0; j < VP.getTotalNum(); j++) - { { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateV(VP, j, vTemp.data()); - } - ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); - } -} - -template -void LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - // Device data of phi_vgl_v must be up-to-date upon return - phi_vgl_v.updateTo(); - - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - for (int iw = 0; iw < nw; iw++) - { - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); - GradType dphi; - for (size_t idim = 0; idim < QMCTraits::DIM; idim++) - dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; - grads[iw] = dphi; - } -} - -template -void LCAOrbitalSetT::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) -{ - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGH(P, iat, Temph); - if (Identity) - evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); - else - { - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); - } -} - -template -void LCAOrbitalSetT::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& dhpsi, - GGGVector& dghpsi) -{ - // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n"); - - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - if (Identity) - evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); - else - { - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); - } + ScopedTimer local(mo_timer_); + // when only a subset of orbitals is used, extract limited rows of C. + Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); + MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); + } + + for (size_t j = 0; j < VP.getTotalNum(); j++) { + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateV(VP, j, vTemp.data()); + } + ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); + } +} + +template +void +LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, std::vector& grads) const +{ + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + // Device data of phi_vgl_v must be up-to-date upon return + phi_vgl_v.updateTo(); + + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + for (int iw = 0; iw < nw; iw++) { + ratios[iw] = simd::dot( + invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); + GradType dphi; + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + dphi[idim] = + simd::dot(invRow_ptr_list[iw], + phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / + ratios[iw]; + grads[iw] = dphi; + } +} + +template +void +LCAOrbitalSetT::evaluateVGH(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) +{ + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGH(P, iat, Temph); + if (Identity) + evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); + else { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); + } +} + +template +void +LCAOrbitalSetT::evaluateVGHGH(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, HessVector& dhpsi, GGGVector& dghpsi) +{ + // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not + // implemented\n"); + + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + if (Identity) + evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); + else { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Tempgh, C_partial_view, Tempghv); + evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); + } } /* implement using gemm algorithm */ -template -inline void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) const -{ - const size_t output_size = logdet.cols(); - std::copy_n(temp.data(0), output_size, logdet[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dlogdet[i][j][0] = gx[j]; - dlogdet[i][j][1] = gy[j]; - dlogdet[i][j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2logdet[i]); -} -template -void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - } -} - -template -void LCAOrbitalSetT::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const -{ - const size_t output_size = dpsi.cols(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - } -} - -template -void LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, - int i, - GradMatrix& dpsi, - HessMatrix& dgpsi, - GradMatrix& dlpsi) const -{ - const size_t output_size = dpsi.cols(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - - dgpsi[i][j](0, 0) = -hxx[j]; - dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; - dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; - dgpsi[i][j](1, 1) = -hyy[j]; - dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; - dgpsi[i][j](2, 2) = -hzz[j]; - - //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector. - dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); - dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); - dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); - } -} - -template -void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); - } - } - else - { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - Product_ABt(Temp, C_partial_view, Tempv); - evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); +template +inline void +LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, int i, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) const +{ + const size_t output_size = logdet.cols(); + std::copy_n(temp.data(0), output_size, logdet[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) { + dlogdet[i][j][0] = gx[j]; + dlogdet[i][j][1] = gy[j]; + dlogdet[i][j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2logdet[i]); +} +template +void +LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, int i, + ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; } - } } -template -void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) +template +void +LCAOrbitalSetT::evaluate_ionderiv_v_impl( + const vgl_type& temp, int i, GradMatrix& dpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + } +} + +template +void +LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i, + GradMatrix& dpsi, HessMatrix& dgpsi, GradMatrix& dlpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + + dgpsi[i][j](0, 0) = -hxx[j]; + dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; + dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; + dgpsi[i][j](1, 1) = -hyy[j]; + dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; + dgpsi[i][j](2, 2) = -hzz[j]; + + // Since this returns the ion gradient of the laplacian, we have to + // trace the grad hessian vector. + dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); + dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); + dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); + } +} + +template +void +LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); + if (Identity) { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGL(P, iat, Temp); + evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); + } + } + else { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGL(P, iat, Temp); + Product_ABt(Temp, C_partial_view, Tempv); + evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); + } } - } } -template -void LCAOrbitalSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) +template +void +LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) { - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); - Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); - evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + if (Identity) { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGH(P, iat, Temph); + evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); + } + } + else { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGH(P, iat, Temph); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); + } } - } } -template -void LCAOrbitalSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +template +void +LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet) { - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); - evaluate_ionderiv_v_impl(Temp, i, gradphi); + if (Identity) { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, + grad_grad_grad_logdet); + } } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); - Product_ABt(this->Temp, *C, this->Tempv); - evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); - } - } -} - -template -void LCAOrbitalSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); - evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); + else { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); + Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); + evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, + grad_grad_logdet, grad_grad_grad_logdet); + } } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); - Product_ABt(this->Tempgh, *C, this->Tempghv); - evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); - } - } -} - -template -void LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) -{ - if (Identity) - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); - evaluate_ionderiv_v_row_impl(this->Temp, gradphi); - } - else - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); - Product_ABt(Temp, *C, this->Tempv); - evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); - } -} - -template -void LCAOrbitalSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (!use_stored_copy) - *C_copy = *C; - //gemm is out-of-place - BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, - rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); - - /* debugging code - app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl; - for (int j = 0; j < OrbitalSetSize; j++) - for (int i = 0; i < BasisSetSize; i++) - { - app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific - << *(C->data() + j * BasisSetSize + i); +} - if ((j * BasisSetSize + i + 1) % 4 == 0) - app_log() << std::endl; +template +void +LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, int first, + int last, const ParticleSetT& source, int iat_src, GradMatrix& gradphi) +{ + if (Identity) { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateGradSourceV( + P, iat, source, iat_src, this->Temp); + evaluate_ionderiv_v_impl(Temp, i, gradphi); + } + } + else { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateGradSourceV( + P, iat, source, iat_src, this->Temp); + Product_ABt(this->Temp, *C, this->Tempv); + evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); + } + } +} + +template +void +LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, int first, + int last, const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) +{ + if (Identity) { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateGradSourceVGL( + P, iat, source, iat_src, this->Tempgh); + evaluate_ionderiv_vgl_impl( + this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } + else { + for (size_t i = 0, iat = first; iat < last; i++, iat++) { + myBasisSet->evaluateGradSourceVGL( + P, iat, source, iat_src, this->Tempgh); + Product_ABt(this->Tempgh, *C, this->Tempghv); + evaluate_ionderiv_vgl_impl( + this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } } - */ +} + +template +void +LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSetT& P, int iel, + const ParticleSetT& source, int iat_src, GradVector& gradphi) +{ + if (Identity) { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + evaluate_ionderiv_v_row_impl(this->Temp, gradphi); + } + else { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + Product_ABt(Temp, *C, this->Tempv); + evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); + } +} + +template +void +LCAOrbitalSetT::applyRotation( + const ValueMatrix& rot_mat, bool use_stored_copy) +{ + if (!use_stored_copy) + *C_copy = *C; + // gemm is out-of-place + BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, + this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, + rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), + BasisSetSize); + + /* debugging code + app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << + std::endl; for (int j = 0; j < OrbitalSetSize; j++) for (int i = 0; i < + BasisSetSize; i++) + { + app_log() << " " << std::right << std::fixed << std::setprecision(16) << + std::setw(23) << std::scientific + << *(C->data() + j * BasisSetSize + i); + + if ((j * BasisSetSize + i + 1) % 4 == 0) + app_log() << std::endl; + } + */ } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h index 974add33b6..f8bf40d017 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -14,7 +14,7 @@ #include "Numerics/DeterminantOperators.h" #include "Numerics/MatrixOperators.h" -#include "QMCWaveFunctions/BasisSetBase.h" +#include "QMCWaveFunctions/BasisSetBaseT.h" #include "QMCWaveFunctions/SPOSetT.h" #include @@ -31,7 +31,7 @@ template class LCAOrbitalSetT : public SPOSetT { public: - using basis_type = SoaBasisSetBase; + using basis_type = SoaBasisSetBaseT; using vgl_type = typename basis_type::vgl_type; using vgh_type = typename basis_type::vgh_type; using vghgh_type = typename basis_type::vghgh_type; @@ -122,63 +122,63 @@ class LCAOrbitalSetT : public SPOSetT checkObject() const final; void - evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; void - evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; void mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, int iat, + const RefVectorWithLeader>& P_list, int iat, const RefVector& psi_v_list) const final; void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, int iat, + const RefVectorWithLeader>& P_list, int iat, const RefVector& psi_v_list, const RefVector& dpsi_v_list, const RefVector& d2psi_v_list) const final; void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& vp_list, const RefVector& psi_list, const std::vector& invRow_ptr_list, std::vector>& ratios_list) const final; void - evaluateDetRatios(const VirtualParticleSet& VP, ValueVector& psi, + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) final; void mw_evaluateVGLandDetRatioGrads( const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, int iat, + const RefVectorWithLeader>& P_list, int iat, const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, std::vector& grads) const final; void - evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, + evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) final; void - evaluateVGHGH(const ParticleSet& P, int iat, ValueVector& psi, + evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) final; void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet) final; void - evaluate_notranspose(const ParticleSet& P, int first, int last, + evaluate_notranspose(const ParticleSetT& P, int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet) final; @@ -242,8 +242,8 @@ class LCAOrbitalSetT : public SPOSetT * orbitals. */ void - evaluateGradSource(const ParticleSet& P, int first, int last, - const ParticleSet& source, int iat_src, GradMatrix& grad_phi) final; + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi) final; /** * \brief Calculate ion derivatives of SPO's, their gradients, and their @@ -262,13 +262,13 @@ class LCAOrbitalSetT : public SPOSetT * for all particles and all orbitals. */ void - evaluateGradSource(const ParticleSet& P, int first, int last, - const ParticleSet& source, int iat_src, GradMatrix& grad_phi, + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) final; void - evaluateGradSourceRow(const ParticleSet& P, int iel, - const ParticleSet& source, int iat_src, GradVector& grad_phi) final; + evaluateGradSourceRow(const ParticleSetT& P, int iel, + const ParticleSetT& source, int iat_src, GradVector& grad_phi) final; void createResource(ResourceCollection& collection) const final; @@ -362,13 +362,13 @@ class LCAOrbitalSetT : public SPOSetT void mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, int iat, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& phi_vgl_v) const; /// packed walker GEMM implementation void mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, int iat, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& phi_v) const; struct LCAOMultiWalkerMem; diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp index f713646d82..87b4e719d0 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. // @@ -9,62 +9,64 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - #include "LCAOrbitalSetWithCorrectionT.h" namespace qmcplusplus { -template -LCAOrbitalSetWithCorrectionT::LCAOrbitalSetWithCorrectionT(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs) - : SPOSetT(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) -{} +template +LCAOrbitalSetWithCorrectionT::LCAOrbitalSetWithCorrectionT( + const std::string& my_name, ParticleSetT& ions, ParticleSetT& els, + std::unique_ptr&& bs) : + SPOSetT(my_name), + lcao(my_name + "_modified", std::move(bs)), + cusp(ions, els) +{ +} -template -void LCAOrbitalSetWithCorrectionT::setOrbitalSetSize(int norbs) +template +void +LCAOrbitalSetWithCorrectionT::setOrbitalSetSize(int norbs) { - assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); - this->OrbitalSetSize = norbs; - cusp.setOrbitalSetSize(norbs); + assert( + lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); + this->OrbitalSetSize = norbs; + cusp.setOrbitalSetSize(norbs); } -template -std::unique_ptr> LCAOrbitalSetWithCorrectionT::makeClone() const +template +std::unique_ptr> +LCAOrbitalSetWithCorrectionT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void LCAOrbitalSetWithCorrectionT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +LCAOrbitalSetWithCorrectionT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& psi) { - lcao.evaluateValue(P, iat, psi); - cusp.addV(P, iat, psi); + lcao.evaluateValue(P, iat, psi); + cusp.addV(P, iat, psi); } -template -void LCAOrbitalSetWithCorrectionT::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +LCAOrbitalSetWithCorrectionT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); - cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); + lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); + cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); } -template -void LCAOrbitalSetWithCorrectionT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +LCAOrbitalSetWithCorrectionT::evaluate_notranspose(const ParticleSetT& P, + int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); + lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); } template class LCAOrbitalSetWithCorrectionT; diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h index 30c3f188e6..8b0003d18f 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // @@ -9,68 +9,72 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H -#include "QMCWaveFunctions/SPOSetT.h" -#include "QMCWaveFunctions/BasisSetBase.h" #include "LCAOrbitalSetT.h" +#include "QMCWaveFunctions/BasisSetBaseT.h" +#include "QMCWaveFunctions/SPOSetT.h" #include "SoaCuspCorrectionT.h" - namespace qmcplusplus { /** class to add cusp correction to LCAOrbitalSet. - * - */ + * + */ -template +template class LCAOrbitalSetWithCorrectionT : public SPOSetT { public: - using basis_type = typename LCAOrbitalSetT::basis_type; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - /** constructor + using basis_type = typename LCAOrbitalSetT::basis_type; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + /** constructor * @param ions * @param els * @param bs pointer to the BasisSet * @param rl report level */ - LCAOrbitalSetWithCorrectionT(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs); + LCAOrbitalSetWithCorrectionT(const std::string& my_name, + ParticleSetT& ions, ParticleSetT& els, + std::unique_ptr&& bs); - LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default; + LCAOrbitalSetWithCorrectionT( + const LCAOrbitalSetWithCorrectionT& in) = default; - std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; } + std::string + getClassName() const final + { + return "LCAOrbitalSetWithCorrectionT"; + } - std::unique_ptr> makeClone() const final; + std::unique_ptr> + makeClone() const final; - void setOrbitalSetSize(int norbs) final; + void + setOrbitalSetSize(int norbs) final; - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) final; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; - template - friend class LCAOrbitalBuilderT; + template + friend class LCAOrbitalBuilderT; private: - LCAOrbitalSetT lcao; + LCAOrbitalSetT lcao; - SoaCuspCorrectionT cusp; + SoaCuspCorrectionT cusp; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h index 110491c006..ee9ecde7fe 100644 --- a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h +++ b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h @@ -21,6 +21,7 @@ #include "hdf/hdf_archive.h" #include "LCAO/MultiQuinticSpline1D.h" #include "LCAO/SoaAtomicBasisSet.h" +#include "LCAO/SoaAtomicBasisSetT.h" namespace qmcplusplus { @@ -145,5 +146,60 @@ class RadialOrbitalSetBuilder, SH>> : m_orbitals.setRmax(0); //set Rmax } }; + +template +class RadialOrbitalSetBuilder, SH, ORBT>> : public MPIObjectBase +{ +public: + using COT = SoaAtomicBasisSetT, SH, ORBT>; + using RadialOrbital_t = MultiFunctorAdapter; + using single_type = typename RadialOrbital_t::single_type; + + ///true, if the RadialOrbitalType is normalized + bool Normalized; + ///orbitals to build + COT& m_orbitals; + + ///constructor + RadialOrbitalSetBuilder(Communicate* comm, COT& aos) : MPIObjectBase(comm), Normalized(true), m_orbitals(aos) {} + + ///implement functions used by AOBasisBuilder + bool addGrid(xmlNodePtr cur, const std::string& rad_type) { return true; } + bool addGridH5(hdf_archive& hin) { return true; } + bool openNumericalBasisH5(xmlNodePtr cur) { return true; } + bool put(xmlNodePtr cur) + { + const std::string a(lowerCase(getXMLAttributeValue(cur, "normalized"))); + if (a == "no") + Normalized = false; + return true; + } + + bool addRadialOrbital(xmlNodePtr cur, const std::string& rad_type, const QuantumNumberType& nlms) + { + auto radorb = std::make_unique(nlms[q_l], Normalized); + radorb->putBasisGroup(cur); + + m_orbitals.RnlID.push_back(nlms); + m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb)); + return true; + } + + bool addRadialOrbitalH5(hdf_archive& hin, const std::string& rad_type, const QuantumNumberType& nlms) + { + auto radorb = std::make_unique(nlms[q_l], Normalized); + radorb->putBasisGroupH5(hin, *myComm); + + m_orbitals.RnlID.push_back(nlms); + m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb)); + + return true; + } + + void finalize() + { + m_orbitals.setRmax(0); //set Rmax + } +}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h index 71e36230bd..4d03b3d652 100644 --- a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h +++ b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h @@ -166,7 +166,7 @@ bool RadialOrbitalSetBuilder::addGrid(xmlNodePtr cur, const std::string& ra hin.pop(); } else - input_grid = OneDimGridFactory::createGrid(cur); + input_grid = OneDimGridFactory::createGrid(cur); //set zero to use std::max m_rcut_safe = 0; diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h new file mode 100644 index 0000000000..1f1bc53d5e --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h @@ -0,0 +1,775 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SoaAtomicBasisSetT.h + */ +#ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H +#define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H + +#include "CPU/math.hpp" +#include "OptimizableObject.h" + +namespace qmcplusplus +{ +template +struct CorrectPhaseFunctor +{ + const TinyVector& superTwist; + + template + T + operator()(PosType Tv) const + { + return 1.0; + } +}; + +template +struct CorrectPhaseFunctor> +{ + const TinyVector& superTwist; + + template + std::complex + operator()(PosType Tv) const + { + T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] + + superTwist[2] * Tv[2]; + T s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + return {c, s}; + }; +}; + +/* A basis set for a center type + * + * @tparam ROT : radial function type, e.g.,NGFunctor + * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion + * + * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$ + */ +template +struct SoaAtomicBasisSetT +{ + using RadialOrbital_t = ROT; + using RealType = typename ROT::RealType; + using GridType = typename ROT::GridType; + using ValueType = ORBT; + + /// size of the basis set + int BasisSetSize; + /// Number of Cell images for the evaluation of the orbital with PBC. If No + /// PBC, should be 0; + TinyVector PBCImages; + /// Coordinates of SuperTwist + TinyVector SuperTwist; + /// Phase Factor array + std::vector periodic_image_phase_factors; + /// maximum radius of this center + RealType Rmax; + /// spherical harmonics + SH Ylm; + /// radial orbitals + ROT MultiRnl; + /// index of the corresponding real Spherical Harmonic with quantum numbers + /// \f$ (l,m) \f$ + aligned_vector LM; + /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) + * \f$ */ + aligned_vector NL; + /// container for the quantum-numbers + std::vector RnlID; + /// temporary storage + VectorSoaContainer tempS; + + /// the constructor + explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) : + Ylm(lmax, addsignforM) + { + } + + void + checkInVariables(opt_variables_type& active) + { + // for(size_t nl=0; nlcheckInVariables(active); + } + + void + checkOutVariables(const opt_variables_type& active) + { + // for(size_t nl=0; nlcheckOutVariables(active); + } + + void + resetParameters(const opt_variables_type& active) + { + // for(size_t nl=0; nlresetParameters(active); + } + + /** return the number of basis functions + */ + inline int + getBasisSetSize() const + { + //=NL.size(); + return BasisSetSize; + } + + /** Set the number of periodic image for the evaluation of the orbitals and + * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1), + * SuperTwist(0,0,0) and the PhaseFactor=1. + */ + void + setPBCParams(const TinyVector& pbc_images, + const TinyVector supertwist, + const std::vector& PeriodicImagePhaseFactors) + { + PBCImages = pbc_images; + periodic_image_phase_factors = PeriodicImagePhaseFactors; + SuperTwist = supertwist; + } + + /** implement a BasisSetBase virtual function + * + * Set Rmax and BasisSetSize + * @todo Should be able to overwrite Rmax to be much smaller than the + * maximum grid + */ + inline void + setBasisSetSize(int n) + { + BasisSetSize = LM.size(); + tempS.resize(std::max(Ylm.size(), RnlID.size())); + } + + /** Set Rmax */ + template + inline void + setRmax(RealType rmax) + { + Rmax = (rmax > 0) ? rmax : MultiRnl.rmax(); + } + + /// set the current offset + inline void + setCenter(int c, int offset) + { + } + + /// Sets a boolean vector for S-type orbitals. Used for cusp correction. + void + queryOrbitalsForSType(std::vector& s_orbitals) const + { + for (int i = 0; i < BasisSetSize; i++) { + s_orbitals[i] = (RnlID[NL[i]][1] == 0); + } + } + + /** evaluate VGL + */ + template + inline void + evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr, + const size_t offset, VGL& vgl, PosType Tv) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new; + + const ValueType correctphase = + CorrectPhaseFunctor{SuperTwist}(Tv); + + constexpr RealType cone(1); + constexpr RealType ctwo(2); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vgl.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vgl.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vgl.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vgl.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + auto* restrict d2psi = vgl.data(4) + offset; + const RealType* restrict ylm_l = Ylm[4]; // lap + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + d2psi[ib] = 0; + } + // Phase_idx (iter) needs to be initialized at -1 as it has to be + // incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; + k++) // loop Translation over Z + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + + iter++; + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], + z = -dr_new[2]; + Ylm.evaluateVGL(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const RealType rinv = cone / r_new; + + /// Phase for PBC containing the phase for the nearest image + /// displacement and the correction due to the Distance + /// table. + const ValueType Phase = + periodic_image_phase_factors[iter] * correctphase; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + const RealType vr = phi[nl]; + + psi[ib] += ang * vr * Phase; + dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase; + dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase; + dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase; + d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + + ctwo * + (gr_x * ang_x + gr_y * ang_y + + gr_z * ang_z) + + vr * ylm_l[lm]) * + Phase; + } + } + } + } + } + + template + inline void + evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr, + const size_t offset, VGH& vgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + + constexpr RealType cone(1); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vgh.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vgh.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vgh.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vgh.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + + auto* restrict dhpsi_xx = vgh.data(4) + offset; + const RealType* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vgh.data(5) + offset; + const RealType* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vgh.data(6) + offset; + const RealType* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vgh.data(7) + offset; + const RealType* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vgh.data(8) + offset; + const RealType* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vgh.data(9) + offset; + const RealType* restrict ylm_zz = Ylm[9]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + // d2psi[ib] = 0; + } + + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; + k++) // loop Translation over Z + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + // const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], + z = -dr_new[2]; + Ylm.evaluateVGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const RealType rinv = cone / r_new; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + + // The non-strictly diagonal term in \partial_i + // \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 + // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial + // R_{nl}}{\partial r}) To save recomputation, I + // evaluate everything except the x_i*x_j term once, + // and store it in gr2_tmp. The full term is obtained + // by x_i*x_j*gr2_tmp. + const RealType gr2_tmp = + rinv * rinv * (d2phi[nl] - drnloverr); + const RealType gr_xx = x * x * gr2_tmp + drnloverr; + const RealType gr_xy = x * y * gr2_tmp; + const RealType gr_xz = x * z * gr2_tmp; + const RealType gr_yy = y * y * gr2_tmp + drnloverr; + const RealType gr_yz = y * z * gr2_tmp; + const RealType gr_zz = z * z * gr2_tmp + drnloverr; + + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + const RealType ang_xx = ylm_xx[lm]; + const RealType ang_xy = ylm_xy[lm]; + const RealType ang_xz = ylm_xz[lm]; + const RealType ang_yy = ylm_yy[lm]; + const RealType ang_yz = ylm_yz[lm]; + const RealType ang_zz = ylm_zz[lm]; + + const RealType vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j + // R + R \partial_i \partial_j Y + // + (\partial_i R) + // (\partial_j Y) + + // (\partial_j R)(\partial_i + // Y) + dhpsi_xx[ib] += + gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += + gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += + gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + } + } + } + } + } + + template + inline void + evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr, + const size_t offset, VGHGH& vghgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + + constexpr RealType cone(1); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + RealType* restrict d3phi = tempS.data(3); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vghgh.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vghgh.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vghgh.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vghgh.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + + auto* restrict dhpsi_xx = vghgh.data(4) + offset; + const RealType* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vghgh.data(5) + offset; + const RealType* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vghgh.data(6) + offset; + const RealType* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vghgh.data(7) + offset; + const RealType* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vghgh.data(8) + offset; + const RealType* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vghgh.data(9) + offset; + const RealType* restrict ylm_zz = Ylm[9]; + + auto* restrict dghpsi_xxx = vghgh.data(10) + offset; + const RealType* restrict ylm_xxx = Ylm[10]; + auto* restrict dghpsi_xxy = vghgh.data(11) + offset; + const RealType* restrict ylm_xxy = Ylm[11]; + auto* restrict dghpsi_xxz = vghgh.data(12) + offset; + const RealType* restrict ylm_xxz = Ylm[12]; + auto* restrict dghpsi_xyy = vghgh.data(13) + offset; + const RealType* restrict ylm_xyy = Ylm[13]; + auto* restrict dghpsi_xyz = vghgh.data(14) + offset; + const RealType* restrict ylm_xyz = Ylm[14]; + auto* restrict dghpsi_xzz = vghgh.data(15) + offset; + const RealType* restrict ylm_xzz = Ylm[15]; + auto* restrict dghpsi_yyy = vghgh.data(16) + offset; + const RealType* restrict ylm_yyy = Ylm[16]; + auto* restrict dghpsi_yyz = vghgh.data(17) + offset; + const RealType* restrict ylm_yyz = Ylm[17]; + auto* restrict dghpsi_yzz = vghgh.data(18) + offset; + const RealType* restrict ylm_yzz = Ylm[18]; + auto* restrict dghpsi_zzz = vghgh.data(19) + offset; + const RealType* restrict ylm_zzz = Ylm[19]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + psi[ib] = 0; + + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + + dghpsi_xxx[ib] = 0; + dghpsi_xxy[ib] = 0; + dghpsi_xxz[ib] = 0; + dghpsi_xyy[ib] = 0; + dghpsi_xyz[ib] = 0; + dghpsi_xzz[ib] = 0; + dghpsi_yyy[ib] = 0; + dghpsi_yyz[ib] = 0; + dghpsi_yzz[ib] = 0; + dghpsi_zzz[ib] = 0; + } + + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; + k++) // loop Translation over Z + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + // const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], + z = -dr_new[2]; + Ylm.evaluateVGHGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi); + + const RealType rinv = cone / r_new; + const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv; + for (size_t ib = 0; ib < BasisSetSize; ++ib) { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + + // The non-strictly diagonal term in \partial_i + // \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 + // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial + // R_{nl}}{\partial r}) To save recomputation, I + // evaluate everything except the x_i*x_j term once, + // and store it in gr2_tmp. The full term is obtained + // by x_i*x_j*gr2_tmp. This is p(r) in the notes. + const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr); + + const RealType gr_xx = x * xu * gr2_tmp + drnloverr; + const RealType gr_xy = x * yu * gr2_tmp; + const RealType gr_xz = x * zu * gr2_tmp; + const RealType gr_yy = y * yu * gr2_tmp + drnloverr; + const RealType gr_yz = y * zu * gr2_tmp; + const RealType gr_zz = z * zu * gr2_tmp + drnloverr; + + // This is q(r) in the notes. + const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp; + + const RealType gr_xxx = + xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu); + const RealType gr_xxy = + xu * xu * yu * gr3_tmp + gr2_tmp * yu; + const RealType gr_xxz = + xu * xu * zu * gr3_tmp + gr2_tmp * zu; + const RealType gr_xyy = + xu * yu * yu * gr3_tmp + gr2_tmp * xu; + const RealType gr_xyz = xu * yu * zu * gr3_tmp; + const RealType gr_xzz = + xu * zu * zu * gr3_tmp + gr2_tmp * xu; + const RealType gr_yyy = + yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu); + const RealType gr_yyz = + yu * yu * zu * gr3_tmp + gr2_tmp * zu; + const RealType gr_yzz = + yu * zu * zu * gr3_tmp + gr2_tmp * yu; + const RealType gr_zzz = + zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu); + + // Angular derivatives up to third + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + + const RealType ang_xx = ylm_xx[lm]; + const RealType ang_xy = ylm_xy[lm]; + const RealType ang_xz = ylm_xz[lm]; + const RealType ang_yy = ylm_yy[lm]; + const RealType ang_yz = ylm_yz[lm]; + const RealType ang_zz = ylm_zz[lm]; + + const RealType ang_xxx = ylm_xxx[lm]; + const RealType ang_xxy = ylm_xxy[lm]; + const RealType ang_xxz = ylm_xxz[lm]; + const RealType ang_xyy = ylm_xyy[lm]; + const RealType ang_xyz = ylm_xyz[lm]; + const RealType ang_xzz = ylm_xzz[lm]; + const RealType ang_yyy = ylm_yyy[lm]; + const RealType ang_yyz = ylm_yyz[lm]; + const RealType ang_yzz = ylm_yzz[lm]; + const RealType ang_zzz = ylm_zzz[lm]; + + const RealType vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j + // R + R \partial_i \partial_j Y + // + (\partial_i R) + // (\partial_j Y) + + // (\partial_j R)(\partial_i + // Y) + dhpsi_xx[ib] += + gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += + gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += + gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + + dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + + 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx; + dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy + + gr_xx * ang_y + ang_xx * gr_y + + 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x; + dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz + + gr_xx * ang_z + ang_xx * gr_z + + 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x; + dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy + + gr_yy * ang_x + ang_yy * gr_x + + 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y; + dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x + + ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y; + dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz + + gr_zz * ang_x + ang_zz * gr_x + + 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z; + dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + + 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy; + dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz + + gr_yy * ang_z + ang_yy * gr_z + + 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y; + dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz + + gr_zz * ang_y + ang_zz * gr_y + + 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z; + dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + + 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz; + } + } + } + } + } + + /** evaluate V + */ + template + inline void + evaluateV(const LAT& lattice, const RealType r, const PosType& dr, + VT* restrict psi, PosType Tv) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + + const ValueType correctphase = + CorrectPhaseFunctor{SuperTwist}(Tv); + + RealType* restrict ylm_v = tempS.data(0); + RealType* restrict phi_r = tempS.data(1); + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] = 0; + // Phase_idx (iter) needs to be initialized at -1 as it has to be + // incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; + k++) // loop Translation over Z + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + iter++; + if (r_new >= Rmax) + continue; + + Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v); + MultiRnl.evaluate(r_new, phi_r); + /// Phase for PBC containing the phase for the nearest image + /// displacement and the correction due to the Distance + /// table. + const ValueType Phase = + periodic_image_phase_factors[iter] * correctphase; + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase; + } + } + } + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp index 57a1312447..85c17ef568 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2021 QMCPACK developers. // @@ -9,160 +9,173 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - /** @file SoaCuspCorrectionT.cpp */ #include "SoaCuspCorrectionT.h" + #include "SoaCuspCorrectionBasisSet.h" +#include "Particle/DistanceTableT.h" namespace qmcplusplus { -template -SoaCuspCorrectionT::SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions)) +template +SoaCuspCorrectionT::SoaCuspCorrectionT( + ParticleSetT& ions, ParticleSetT& els) : + myTableIndex(els.addTable(ions)) { - NumCenters = ions.getTotalNum(); - NumTargets = els.getTotalNum(); - LOBasisSet.resize(NumCenters); + NumCenters = ions.getTotalNum(); + NumTargets = els.getTotalNum(); + LOBasisSet.resize(NumCenters); } -template -SoaCuspCorrectionT::SoaCuspCorrectionT(const SoaCuspCorrectionT& a) = default; +template +SoaCuspCorrectionT::SoaCuspCorrectionT( + const SoaCuspCorrectionT& a) = default; -template -void SoaCuspCorrectionT::setOrbitalSetSize(int norbs) +template +void +SoaCuspCorrectionT::setOrbitalSetSize(int norbs) { - MaxOrbSize = norbs; - myVGL.resize(5, MaxOrbSize); + MaxOrbSize = norbs; + myVGL.resize(5, MaxOrbSize); } -template -inline void SoaCuspCorrectionT::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl) +template +inline void +SoaCuspCorrectionT::evaluateVGL( + const ParticleSetT& P, int iat, VGLVector& vgl) { - assert(MaxOrbSize >= vgl.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - { - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - auto v_out = vgl.data(0); - auto gx_out = vgl.data(1); - auto gy_out = vgl.data(2); - auto gz_out = vgl.data(3); - auto l_out = vgl.data(4); - for (size_t i = 0; i < vgl.size(); ++i) + assert(MaxOrbSize >= vgl.size()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], + myVGL[2], myVGL[3], myVGL[4]); + { - v_out[i] += v_in[i]; - gx_out[i] += gx_in[i]; - gy_out[i] += gy_in[i]; - gz_out[i] += gz_in[i]; - l_out[i] += l_in[i]; + const auto v_in = myVGL[0]; + const auto gx_in = myVGL[1]; + const auto gy_in = myVGL[2]; + const auto gz_in = myVGL[3]; + const auto l_in = myVGL[4]; + auto v_out = vgl.data(0); + auto gx_out = vgl.data(1); + auto gy_out = vgl.data(2); + auto gz_out = vgl.data(3); + auto l_out = vgl.data(4); + for (size_t i = 0; i < vgl.size(); ++i) { + v_out[i] += v_in[i]; + gx_out[i] += gx_in[i]; + gy_out[i] += gy_in[i]; + gz_out[i] += gz_in[i]; + l_out[i] += l_in[i]; + } } - } } -template -void SoaCuspCorrectionT::evaluate_vgl(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - assert(MaxOrbSize >= psi.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.size(); ++i) - { - psi[i] += v_in[i]; - dpsi[i][0] += gx_in[i]; - dpsi[i][1] += gy_in[i]; - dpsi[i][2] += gz_in[i]; - d2psi[i] += l_in[i]; - } + assert(MaxOrbSize >= psi.size()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], + myVGL[2], myVGL[3], myVGL[4]); + + const auto v_in = myVGL[0]; + const auto gx_in = myVGL[1]; + const auto gy_in = myVGL[2]; + const auto gz_in = myVGL[3]; + const auto l_in = myVGL[4]; + for (size_t i = 0; i < psi.size(); ++i) { + psi[i] += v_in[i]; + dpsi[i][0] += gx_in[i]; + dpsi[i][1] += gy_in[i]; + dpsi[i][2] += gz_in[i]; + d2psi[i] += l_in[i]; + } } -template -void SoaCuspCorrectionT::evaluate_vgl(const ParticleSet& P, - int iat, - int idx, - ValueMatrix& psi, - GradMatrix& dpsi, - ValueMatrix& d2psi) +template +void +SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, int iat, int idx, + ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi) { - assert(MaxOrbSize >= psi.cols()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.cols(); ++i) - { - psi[idx][i] += v_in[i]; - dpsi[idx][i][0] += gx_in[i]; - dpsi[idx][i][1] += gy_in[i]; - dpsi[idx][i][2] += gz_in[i]; - d2psi[idx][i] += l_in[i]; - } + assert(MaxOrbSize >= psi.cols()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], + myVGL[2], myVGL[3], myVGL[4]); + + const auto v_in = myVGL[0]; + const auto gx_in = myVGL[1]; + const auto gy_in = myVGL[2]; + const auto gz_in = myVGL[3]; + const auto l_in = myVGL[4]; + for (size_t i = 0; i < psi.cols(); ++i) { + psi[idx][i] += v_in[i]; + dpsi[idx][i][0] += gx_in[i]; + dpsi[idx][i][1] += gy_in[i]; + dpsi[idx][i][2] += gz_in[i]; + d2psi[idx][i] += l_in[i]; + } } -template -void SoaCuspCorrectionT::evaluateV(const ParticleSet& P, int iat, ValueVector& psi) +template +void +SoaCuspCorrectionT::evaluateV( + const ParticleSetT& P, int iat, ValueVector& psi) { - assert(MaxOrbSize >= psi.size()); - T* tmp_vals = myVGL[0]; + assert(MaxOrbSize >= psi.size()); + T* tmp_vals = myVGL[0]; - std::fill_n(tmp_vals, myVGL.size(), 0.0); + std::fill_n(tmp_vals, myVGL.size(), 0.0); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); - //THIS IS SERIAL, only way to avoid this is to use myVGL - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate(dist[c], tmp_vals); + // THIS IS SERIAL, only way to avoid this is to use myVGL + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate(dist[c], tmp_vals); - { //collect - const auto v_in = myVGL[0]; - for (size_t i = 0; i < psi.size(); ++i) - psi[i] += v_in[i]; - } + { // collect + const auto v_in = myVGL[0]; + for (size_t i = 0; i < psi.size(); ++i) + psi[i] += v_in[i]; + } } -template -void SoaCuspCorrectionT::add(int icenter, std::unique_ptr aos) +template +void +SoaCuspCorrectionT::add(int icenter, std::unique_ptr aos) { - assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!"); - LOBasisSet[icenter].reset(aos.release()); + assert(MaxOrbSize == aos->getNumOrbs() && + "All the centers should support the same number of orbitals!"); + LOBasisSet[icenter].reset(aos.release()); } template class SoaCuspCorrectionT; diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h index dca3912f90..0edf61af87 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2021 QMCPACK developers. // @@ -9,18 +9,16 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - /** @file SoaCuspCorrectionT.h */ #ifndef QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H #define QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H -#include "Configuration.h" #include "QMCWaveFunctions/SPOSetT.h" namespace qmcplusplus { -template +template class CuspCorrectionAtomicBasis; /** A localized basis set derived from BasisSetBase @@ -30,88 +28,110 @@ class CuspCorrectionAtomicBasis; * The template parameter COT denotes Centered-Orbital-Type which provides * a set of localized orbitals associated with a center. */ -template +template class SoaCuspCorrectionT { - using RealType = typename SPOSetT::RealType; - using VGLVector = VectorSoaContainer; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradVector = typename SPOSetT::GradVector; - using ValueVector = typename SPOSetT::ValueVector; - using PosType = typename SPOSetT::PosType; - - ///number of centers, e.g., ions - size_t NumCenters; - ///number of quantum particles - size_t NumTargets; - ///number of quantum particles - const int myTableIndex; - /** Maximal number of supported MOs - * this is not the AO basis because cusp correction is applied on the MO directly. - */ - int MaxOrbSize = 0; - - ///COMPLEX WON'T WORK - using COT = CuspCorrectionAtomicBasis; - - /** container of the unique pointers to the Atomic Orbitals - * - * size of LOBasisSet = number of centers (atoms) - * should use unique_ptr once COT is fixed for better performance - */ - std::vector> LOBasisSet; - - Matrix myVGL; + using RealType = typename SPOSetT::RealType; + using VGLVector = VectorSoaContainer; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using PosType = typename SPOSetT::PosType; + + /// number of centers, e.g., ions + size_t NumCenters; + /// number of quantum particles + size_t NumTargets; + /// number of quantum particles + const int myTableIndex; + /** Maximal number of supported MOs + * this is not the AO basis because cusp correction is applied on the MO + * directly. + */ + int MaxOrbSize = 0; + + /// COMPLEX WON'T WORK + using COT = CuspCorrectionAtomicBasis; + + /** container of the unique pointers to the Atomic Orbitals + * + * size of LOBasisSet = number of centers (atoms) + * should use unique_ptr once COT is fixed for better performance + */ + std::vector> LOBasisSet; + + Matrix myVGL; public: - /** constructor - * @param ions ionic system - * @param els electronic system - */ - SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els); - - /** copy constructor */ - SoaCuspCorrectionT(const SoaCuspCorrectionT& a); - - /** set the number of orbitals this cusp correction may serve. call this before adding any correction centers. - */ - void setOrbitalSetSize(int norbs); - - /** compute VGL - * @param P quantum particleset - * @param iat active particle - * @param vgl Matrix(5,BasisSetSize) - * @param trialMove if true, use getTempDists()/getTempDispls() - */ - void evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl); - - void evaluate_vgl(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluate_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi); - - /** compute values for the iat-paricle move - * - * Always uses getTempDists() and getTempDispls() - */ - void evaluateV(const ParticleSet& P, int iat, ValueVector& psi); - - /** add a new set of Centered Atomic Orbitals - * @param icenter the index of the center - * @param aos a set of Centered Atomic Orbitals - */ - void add(int icenter, std::unique_ptr aos); - - void addVGL(const ParticleSet& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); } - void addV(const ParticleSet& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); } - void add_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi) - { - evaluate_vgl(P, iat, idx, vals, dpsi, d2psi); - } - void add_vector_vgl(const ParticleSet& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi) - { - evaluate_vgl(P, iat, vals, dpsi, d2psi); - } + /** constructor + * @param ions ionic system + * @param els electronic system + */ + SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els); + + /** copy constructor */ + SoaCuspCorrectionT(const SoaCuspCorrectionT& a); + + /** set the number of orbitals this cusp correction may serve. call this + * before adding any correction centers. + */ + void + setOrbitalSetSize(int norbs); + + /** compute VGL + * @param P quantum particleset + * @param iat active particle + * @param vgl Matrix(5,BasisSetSize) + * @param trialMove if true, use getTempDists()/getTempDispls() + */ + void + evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl); + + void + evaluate_vgl(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi); + + void + evaluate_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& psi, + GradMatrix& dpsi, ValueMatrix& d2psi); + + /** compute values for the iat-paricle move + * + * Always uses getTempDists() and getTempDispls() + */ + void + evaluateV(const ParticleSetT& P, int iat, ValueVector& psi); + + /** add a new set of Centered Atomic Orbitals + * @param icenter the index of the center + * @param aos a set of Centered Atomic Orbitals + */ + void + add(int icenter, std::unique_ptr aos); + + void + addVGL(const ParticleSetT& P, int iat, VGLVector& vgl) + { + evaluateVGL(P, iat, vgl); + } + void + addV(const ParticleSetT& P, int iat, ValueVector& psi) + { + evaluateV(P, iat, psi); + } + void + add_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& vals, + GradMatrix& dpsi, ValueMatrix& d2psi) + { + evaluate_vgl(P, iat, idx, vals, dpsi, d2psi); + } + void + add_vector_vgl(const ParticleSetT& P, int iat, ValueVector& vals, + GradVector& dpsi, ValueVector& d2psi) + { + evaluate_vgl(P, iat, vals, dpsi, d2psi); + } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp new file mode 100644 index 0000000000..7b62735768 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp @@ -0,0 +1,469 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#include "SoaLocalizedBasisSetT.h" + +#include "MultiFunctorAdapter.h" +#include "MultiQuinticSpline1D.h" +#include "Numerics/SoaCartesianTensor.h" +#include "Numerics/SoaSphericalTensor.h" +#include "Particle/DistanceTableT.h" +#include "SoaAtomicBasisSetT.h" + +#include + +namespace qmcplusplus +{ +template +SoaLocalizedBasisSetT::SoaLocalizedBasisSetT( + ParticleSetT& ions, ParticleSetT& els) : + ions_(ions), + myTableIndex(els.addTable(ions, + DTModes::NEED_FULL_TABLE_ANYTIME | + DTModes::NEED_VP_FULL_TABLE_ON_HOST)), + SuperTwist(0.0) +{ + NumCenters = ions.getTotalNum(); + NumTargets = els.getTotalNum(); + LOBasisSet.resize(ions.getSpeciesSet().getTotalNum()); + BasisOffset.resize(NumCenters + 1); + BasisSetSize = 0; +} + +template +SoaLocalizedBasisSetT::SoaLocalizedBasisSetT( + const SoaLocalizedBasisSetT& a) : + SoaBasisSetBaseT(a), + NumCenters(a.NumCenters), + NumTargets(a.NumTargets), + ions_(a.ions_), + myTableIndex(a.myTableIndex), + SuperTwist(a.SuperTwist), + BasisOffset(a.BasisOffset) +{ + LOBasisSet.reserve(a.LOBasisSet.size()); + for (auto& elem : a.LOBasisSet) + LOBasisSet.push_back(std::make_unique(*elem)); +} + +template +void +SoaLocalizedBasisSetT::setPBCParams( + const TinyVector& PBCImages, const TinyVector Sup_Twist, + const std::vector& phase_factor) +{ + for (int i = 0; i < LOBasisSet.size(); ++i) + LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor); + + SuperTwist = Sup_Twist; +} + +template +void +SoaLocalizedBasisSetT::setBasisSetSize(int nbs) +{ + const auto& IonID(ions_.GroupID); + if (BasisSetSize > 0 && nbs == BasisSetSize) + return; + + if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) { + // evaluate the total basis dimension and offset for each center + BasisOffset[0] = 0; + for (int c = 0; c < NumCenters; c++) + BasisOffset[c + 1] = + BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize(); + BasisSetSize = BasisOffset[NumCenters]; + } + else { + // when particles are reordered due to grouping, AOs need to restore the + // input order to match MOs. + std::vector map_input_to_storage(mapping.size()); + for (int c = 0; c < NumCenters; c++) + map_input_to_storage[mapping[c]] = c; + + std::vector basis_offset_input_order(BasisOffset.size(), 0); + for (int c = 0; c < NumCenters; c++) + basis_offset_input_order[c + 1] = basis_offset_input_order[c] + + LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize(); + + for (int c = 0; c < NumCenters; c++) + BasisOffset[c] = basis_offset_input_order[mapping[c]]; + + BasisSetSize = basis_offset_input_order[NumCenters]; + } +} + +template +void +SoaLocalizedBasisSetT::queryOrbitalsForSType( + const std::vector& corrCenter, std::vector& is_s_orbital) const +{ + const auto& IonID(ions_.GroupID); + for (int c = 0; c < NumCenters; c++) { + int idx = BasisOffset[c]; + int bss = LOBasisSet[IonID[c]]->BasisSetSize; + std::vector local_is_s_orbital(bss); + LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital); + for (int k = 0; k < bss; k++) { + if (corrCenter[c]) { + is_s_orbital[idx++] = local_is_s_orbital[k]; + } + else { + is_s_orbital[idx++] = false; + } + } + } +} + +template +void +SoaLocalizedBasisSetT::evaluateVGL( + const ParticleSetT& P, int iat, vgl_type& vgl) +{ + const auto& IonID(ions_.GroupID); + const auto& coordR = P.activeR(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + + PosType Tv; + for (int c = 0; c < NumCenters; c++) { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateVGL( + P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv); + } +} + +template +void +SoaLocalizedBasisSetT::mw_evaluateVGL( + const RefVectorWithLeader>& P_list, int iat, + OffloadMWVGLArray& vgl_v) +{ + for (size_t iw = 0; iw < P_list.size(); iw++) { + const auto& IonID(ions_.GroupID); + const auto& coordR = P_list[iw].activeR(iat); + const auto& d_table = P_list[iw].getDistTableAB(myTableIndex); + const auto& dist = (P_list[iw].getActivePtcl() == iat) ? + d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P_list[iw].getActivePtcl() == iat) ? + d_table.getTempDispls() : + d_table.getDisplRow(iat); + + PosType Tv; + + // number of walkers * BasisSetSize + auto stride = vgl_v.size(1) * BasisSetSize; + assert(BasisSetSize == vgl_v.size(2)); + vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride); + + for (int c = 0; c < NumCenters; c++) { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], + displ[c], BasisOffset[c], vgl_iw, Tv); + } + } +} + +template +void +SoaLocalizedBasisSetT::evaluateVGH( + const ParticleSetT& P, int iat, vgh_type& vgh) +{ + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) { + LOBasisSet[IonID[c]]->evaluateVGH( + P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh); + } +} + +template +void +SoaLocalizedBasisSetT::evaluateVGHGH( + const ParticleSetT& P, int iat, vghgh_type& vghgh) +{ + // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n"); + + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) { + LOBasisSet[IonID[c]]->evaluateVGHGH( + P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh); + } +} + +template +void +SoaLocalizedBasisSetT::evaluateV( + const ParticleSetT& P, int iat, ORBT* restrict vals) +{ + const auto& IonID(ions_.GroupID); + const auto& coordR = P.activeR(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + + PosType Tv; + for (int c = 0; c < NumCenters; c++) { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateV( + P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv); + } +} + +template +void +SoaLocalizedBasisSetT::mw_evaluateValue( + const RefVectorWithLeader>& P_list, int iat, + OffloadMWVArray& v) +{ + for (size_t iw = 0; iw < P_list.size(); iw++) + evaluateV(P_list[iw], iat, v.data_at(iw, 0)); +} + +template +void +SoaLocalizedBasisSetT::evaluateGradSourceV( + const ParticleSetT& P, int iat, const ParticleSetT& ions, + int jion, vgl_type& vgl) +{ + // We need to zero out the temporary array vgl. + auto* restrict gx = vgl.data(1); + auto* restrict gy = vgl.data(2); + auto* restrict gz = vgl.data(3); + + for (int ib = 0; ib < BasisSetSize; ib++) { + gx[ib] = 0; + gy[ib] = 0; + gz[ib] = 0; + } + + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + + PosType Tv; + Tv[0] = Tv[1] = Tv[2] = 0; + // Since LCAO's are written only in terms of (r-R), ionic derivatives only + // exist for the atomic center that we wish to take derivatives of. + // Moreover, we can obtain an ion derivative by multiplying an electron + // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For + // now, just note this is the electron VGL function. + LOBasisSet[IonID[jion]]->evaluateVGL( + P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv); +} + +template +void +SoaLocalizedBasisSetT::evaluateGradSourceVGL( + const ParticleSetT& P, int iat, const ParticleSetT& ions, + int jion, vghgh_type& vghgh) +{ + // We need to zero out the temporary array vghgh. + auto* restrict gx = vghgh.data(1); + auto* restrict gy = vghgh.data(2); + auto* restrict gz = vghgh.data(3); + + auto* restrict hxx = vghgh.data(4); + auto* restrict hxy = vghgh.data(5); + auto* restrict hxz = vghgh.data(6); + auto* restrict hyy = vghgh.data(7); + auto* restrict hyz = vghgh.data(8); + auto* restrict hzz = vghgh.data(9); + + auto* restrict gxxx = vghgh.data(10); + auto* restrict gxxy = vghgh.data(11); + auto* restrict gxxz = vghgh.data(12); + auto* restrict gxyy = vghgh.data(13); + auto* restrict gxyz = vghgh.data(14); + auto* restrict gxzz = vghgh.data(15); + auto* restrict gyyy = vghgh.data(16); + auto* restrict gyyz = vghgh.data(17); + auto* restrict gyzz = vghgh.data(18); + auto* restrict gzzz = vghgh.data(19); + + for (int ib = 0; ib < BasisSetSize; ib++) { + gx[ib] = 0; + gy[ib] = 0; + gz[ib] = 0; + + hxx[ib] = 0; + hxy[ib] = 0; + hxz[ib] = 0; + hyy[ib] = 0; + hyz[ib] = 0; + hzz[ib] = 0; + + gxxx[ib] = 0; + gxxy[ib] = 0; + gxxz[ib] = 0; + gxyy[ib] = 0; + gxyz[ib] = 0; + gxzz[ib] = 0; + gyyy[ib] = 0; + gyyz[ib] = 0; + gyzz[ib] = 0; + gzzz[ib] = 0; + } + + // Since jion is indexed on the source ions not the ions_ the distinction + // between ions_ and ions is extremely important. + const auto& IonID(ions.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : + d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : + d_table.getDisplRow(iat); + + // Since LCAO's are written only in terms of (r-R), ionic derivatives only + // exist for the atomic center that we wish to take derivatives of. + // Moreover, we can obtain an ion derivative by multiplying an electron + // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For + // now, just note this is the electron VGL function. + + LOBasisSet[IonID[jion]]->evaluateVGHGH( + P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh); +} + +template +void +SoaLocalizedBasisSetT::add(int icenter, std::unique_ptr aos) +{ + LOBasisSet[icenter] = std::move(aos); +} + +// TODO: this should be redone with template template parameters + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaCartesianTensor, + double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaCartesianTensor, + std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaCartesianTensor, + float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaCartesianTensor, + std::complex>, + std::complex>; + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaSphericalTensor, + double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaSphericalTensor, + std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaSphericalTensor, + float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaSphericalTensor, + std::complex>, + std::complex>; + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; + +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, double>, + double>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, float>, + float>; +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h new file mode 100644 index 0000000000..6f2e412413 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h @@ -0,0 +1,190 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SoaLocalizedBasisSetT.h + * @brief A derived class from BasisSetBase + * + * This is intended as a replacement for MolecularWaveFunctionComponent and + * any other localized basis set. + */ +#ifndef QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H +#define QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "QMCWaveFunctions/BasisSetBaseT.h" + +#include + +namespace qmcplusplus +{ +/** A localized basis set derived from SoaBasisSetBase + * + * This class performs the evaluation of the basis functions and their + * derivatives for each of the N-particles in a configuration. + * The template parameter COT denotes Centered-Orbital-Type which provides + * a set of localized orbitals associated with a center. + * The template parameter ORBT denotes the orbital value return type + */ +template +class SoaLocalizedBasisSetT : public SoaBasisSetBaseT +{ +public: + using RealType = typename COT::RealType; + using BaseType = SoaBasisSetBaseT; + using ValueType = ORBT; + + using vgl_type = typename BaseType::vgl_type; + using vgh_type = typename BaseType::vgh_type; + using vghgh_type = typename BaseType::vghgh_type; + using PosType = typename ParticleSetT::PosType; + using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray; + using OffloadMWVArray = typename BaseType::OffloadMWVArray; + + using BaseType::BasisSetSize; + + /// number of centers, e.g., ions + size_t NumCenters; + /// number of quantum particles + size_t NumTargets; + /// ion particle set + const ParticleSetT& ions_; + /// number of quantum particles + const int myTableIndex; + /// Global Coordinate of Supertwist read from HDF5 + PosType SuperTwist; + + /** container to store the offsets of the basis functions for each center + * Due to potential reordering of ions, offsets can be in any order. + */ + std::vector BasisOffset; + + /** container of the unique pointers to the Atomic Orbitals + * + * size of LOBasisSet = number of unique centers + */ + std::vector> LOBasisSet; + + /** constructor + * @param ions ionic system + * @param els electronic system + */ + SoaLocalizedBasisSetT(ParticleSetT& ions, ParticleSetT& els); + + /** copy constructor */ + SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a); + + /** makeClone */ + BaseType* + makeClone() const override + { + return new SoaLocalizedBasisSetT(*this); + } + + /** set Number of periodic Images to evaluate the orbitals. + Set to 0 for non-PBC, and set manually in the input. + Passes the pre-computed phase factor for evaluation of complex + wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or + -1 if non-Gamma. + */ + void + setPBCParams(const TinyVector& PBCImages, + const TinyVector Sup_Twist, + const std::vector& phase_factor); + + /** set BasisSetSize and allocate mVGL container + */ + void + setBasisSetSize(int nbs) override; + + /** Determine which orbitals are S-type. Used by cusp correction. + */ + void + queryOrbitalsForSType(const std::vector& corrCenter, + std::vector& is_s_orbital) const override; + + /** compute VGL + * @param P quantum particleset + * @param iat active particle + * @param vgl Matrix(5,BasisSetSize) + * @param trialMove if true, use getTempDists()/getTempDispls() + */ + void + evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) override; + + /** compute V using packed array with all walkers + * @param P_list list of quantum particleset (one for each walker) + * @param iat active particle + * @param v Array(n_walkers, BasisSetSize) + */ + void + mw_evaluateValue(const RefVectorWithLeader>& P_list, + int iat, OffloadMWVArray& v) override; + + /** compute VGL using packed array with all walkers + * @param P_list list of quantum particleset (one for each walker) + * @param iat active particle + * @param vgl Array(n_walkers, 5, BasisSetSize) + */ + void + mw_evaluateVGL(const RefVectorWithLeader>& P_list, + int iat, OffloadMWVGLArray& vgl) override; + + /** compute VGH + * @param P quantum particleset + * @param iat active particle + * @param vgl Matrix(10,BasisSetSize) + * @param trialMove if true, use getTempDists()/getTempDispls() + */ + void + evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) override; + + /** compute VGHGH + * @param P quantum particleset + * @param iat active particle + * @param vghgh Matrix(20,BasisSetSize) + * @param trialMove if true, use getTempDists()/getTempDispls() + */ + void + evaluateVGHGH( + const ParticleSetT& P, int iat, vghgh_type& vghgh) override; + + /** compute values for the iat-paricle move + * + * Always uses getTempDists() and getTempDispls() + * Tv is a translation vector; In PBC, in order to reduce the number + * of images that need to be summed over when generating the AO the + * nearest image displacement, dr, is used. Tv corresponds to the + * translation that takes the 'general displacement' (displacement + * between ion position and electron position) to the nearest image + * displacement. We need to keep track of Tv because it must be add + * as a phase factor, i.e., exp(i*k*Tv). + */ + void + evaluateV( + const ParticleSetT& P, int iat, ORBT* restrict vals) override; + + void + evaluateGradSourceV(const ParticleSetT& P, int iat, + const ParticleSetT& ions, int jion, vgl_type& vgl) override; + + void + evaluateGradSourceVGL(const ParticleSetT& P, int iat, + const ParticleSetT& ions, int jion, vghgh_type& vghgh) override; + + /** add a new set of Centered Atomic Orbitals + * @param icenter the index of the center + * @param aos a set of Centered Atomic Orbitals + */ + void + add(int icenter, std::unique_ptr aos); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h new file mode 100644 index 0000000000..111d812ae4 --- /dev/null +++ b/src/QMCWaveFunctions/OptimizableObjectT.h @@ -0,0 +1,151 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H +#define QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H + +#include "VariableSetT.h" +#include "type_traits/template_types.hpp" + +/**@file OptimizableObject.h + *@brief Declaration of OptimizableObject + */ +namespace qmcplusplus +{ +template +using OptVariablesType = optimize::VariableSetT; + +template +class OptimizableObjectT +{ +public: + OptimizableObjectT(const std::string& name) : name_(name) + { + } + + const std::string& + getName() const + { + return name_; + } + bool + isOptimized() const + { + return is_optimized_; + } + +private: + /** Name of the optimizable object + */ + const std::string name_; + /** If true, this object is actively modified during WFOpt + */ + bool is_optimized_ = false; + +public: + /** check in variational parameters to the global list of parameters used by + * the optimizer. + * @param active a super set of optimizable variables + * + * The existing checkInVariables implementation in WFC/SPO/.. are inclusive + * and it calls checkInVariables of its members class A: public SPOSet {} + * class B: public WFC + * { + * A objA; + * checkInVariables() { objA.checkInVariables(); } + * }; + * + * With OptimizableObject, + * class A: public OptimizableObject {} + * class B: public OptimizableObject + * { + * A objA; + * checkInVariablesExclusive() { // should not call + * objA.checkInVariablesExclusive() if objA has been extracted; } + * }; + * A vector of OptimizableObject, will be created by calling + * extractOptimizableObjects(). All the checkInVariablesExclusive() will be + * called through this vector and thus checkInVariablesExclusive + * implementation should only handle non-OptimizableObject members. + */ + virtual void + checkInVariablesExclusive(OptVariablesType& active) = 0; + + /** reset the parameters during optimizations. Exclusive, see + * checkInVariablesExclusive + */ + virtual void + resetParametersExclusive(const OptVariablesType& active) = 0; + + /** print the state, e.g., optimizables */ + virtual void + reportStatus(std::ostream& os) + { + } + + void + setOptimization(bool state) + { + is_optimized_ = state; + } + + /** Write the variational parameters for this object to the VP HDF file + * + * The hout parameter should come from VariableSet::writeToHDF + * + * Objects can use this function to store additional information to the + * file. + * + * By default the parameters are saved in VariableSet::writeToHDF, and + * objects do not need to implement this function (yet). + * + */ + virtual void + writeVariationalParameters(hdf_archive& hout){}; + + /** Read the variational parameters for this object from the VP HDF file + * + * The hin parameter should come from VariableSet::readFromHDF + * + * By default the parameters are read in VariableSet::readFromHDF, and + * objects do not need to implement this function (yet). + */ + virtual void + readVariationalParameters(hdf_archive& hin){}; +}; + +template +class UniqueOptObjRefsT : public RefVector> +{ +public: + OptimizableObjectT& + operator[](size_t i) const + { + return RefVector>::operator[](i); + } + + void + push_back(OptimizableObjectT& obj) + { + if (obj.getName().empty()) + throw std::logic_error("BUG!! Only named OptimizableObject object " + "can be added to UniqueOptObjRefs!"); + auto result = std::find_if( + this->begin(), this->end(), [&](OptimizableObjectT& element) { + return element.getName() == obj.getName(); + }); + if (result == this->end()) + RefVector>::push_back(obj); + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h index a3acaf7aad..4a092961d9 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h @@ -1,35 +1,37 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - /** @file PWBasis.h * @brief Declaration of Plane-wave basis set */ #ifndef QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H #define QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H +#include "CPU/e2iphi.h" #include "Configuration.h" -#include "Particle/ParticleSet.h" #include "Message/Communicate.h" -#include "type_traits/complex_help.hpp" -#include "CPU/e2iphi.h" +#include "Particle/ParticleSetT.h" #include "hdf/hdf_archive.h" +#include "type_traits/complex_help.hpp" /** If defined, use recursive method to build the basis set for each position * * performance improvement is questionable: load vs sin/cos */ -//#define PWBASIS_USE_RECURSIVE +// #define PWBASIS_USE_RECURSIVE namespace qmcplusplus { @@ -38,129 +40,145 @@ namespace qmcplusplus * Rewrite of PlaneWaveBasis to utilize blas II or III * Support more general input tags */ -template +template class PWBasisT : public QMCTraits { public: - using RealType = typename RealAlias_impl::value_type; - using ComplexType = T; - using PosType = TinyVector; - using IndexType = QMCTraits::IndexType; - using ParticleLayout = ParticleSet::ParticleLayout; - using GIndex_t = TinyVector; + using RealType = typename RealAlias_impl::value_type; + using ComplexType = T; + using PosType = TinyVector; + using IndexType = QMCTraits::IndexType; + using ParticleLayout = typename ParticleSetT::ParticleLayout; + using GIndex_t = TinyVector; private: - ///max of maxg[i] - int maxmaxg; - //Need to store the maximum translation in each dimension to use recursive PW generation. - GIndex_t maxg; - //The PlaneWave data - keep all of these strictly private to prevent inconsistencies. - RealType ecut; - ///twist angle in reduced - PosType twist; - ///twist angle in cartesian - PosType twist_cart; //Twist angle in reduced and Cartesian. + /// max of maxg[i] + int maxmaxg; + // Need to store the maximum translation in each dimension to use recursive + // PW generation. + GIndex_t maxg; + // The PlaneWave data - keep all of these strictly private to prevent + // inconsistencies. + RealType ecut; + /// twist angle in reduced + PosType twist; + /// twist angle in cartesian + PosType twist_cart; // Twist angle in reduced and Cartesian. - ///gvecs in reduced coordiates - std::vector gvecs; - ///Reduced coordinates with offset gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim] - std::vector gvecs_shifted; + /// gvecs in reduced coordiates + std::vector gvecs; + /// Reduced coordinates with offset + /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim] + std::vector gvecs_shifted; - std::vector minusModKplusG2; - std::vector kplusgvecs_cart; //Cartesian. + std::vector minusModKplusG2; + std::vector kplusgvecs_cart; // Cartesian. - Matrix C; - //Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not exp(iGr) - //We need a way of switching between them for G -> -G, otherwise the - //determinant will have multiple rows that are equal (to within a constant factor) - //of others, giving a zero determinant. For this, we build a vector (negative) which - //stores whether a vector is "+" or "-" (with some criterion, to be defined). We - //the switch from cos() to sin() based on the value of this input. - std::vector negative; + Matrix C; + // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not + // exp(iGr) We need a way of switching between them for G -> -G, otherwise + // the determinant will have multiple rows that are equal (to within a + // constant factor) of others, giving a zero determinant. For this, we build + // a vector (negative) which stores whether a vector is "+" or "-" (with + // some criterion, to be defined). We the switch from cos() to sin() based + // on the value of this input. + std::vector negative; public: - //enumeration for the value, laplacian, gradients and size - enum - { - PW_VALUE, - PW_LAP, - PW_GRADX, - PW_GRADY, - PW_GRADZ, - PW_MAXINDEX - }; + // enumeration for the value, laplacian, gradients and size + enum + { + PW_VALUE, + PW_LAP, + PW_GRADX, + PW_GRADY, + PW_GRADZ, + PW_MAXINDEX + }; - Matrix Z; + Matrix Z; - Vector Zv; - /* inputmap is used for a memory efficient way of - * - * importing the basis-set and coefficients when the desired energy cutoff may be - * lower than that represented by all data in the wavefunction input file. - * The steps taken are: - * - Read all basis data. - * - Create map. inputmap[i] = j; j is correct PW index, i is input coef index. - * For basis elements outside cutoff, inputmap[i] = gvecs.size(); - * - Coefficients are in same order as PWs in inputfile => simply file into - * storage matrix using the map as the input. All excess coefficients are - * put into [gvecs.size()] and not used. i.e. coefs need to be allocated 1 higher. - * Such an approach is not needed for Gamma-point only calculations because the - * basis is spherically ordered. However, when a twist-angle is used, the "sphere" - * of allowed planewaves is shifted. - */ + Vector Zv; + /* inputmap is used for a memory efficient way of + * + * importing the basis-set and coefficients when the desired energy cutoff + * may be lower than that represented by all data in the wavefunction input + * file. The steps taken are: + * - Read all basis data. + * - Create map. inputmap[i] = j; j is correct PW index, i is input coef + * index. For basis elements outside cutoff, inputmap[i] = gvecs.size(); + * - Coefficients are in same order as PWs in inputfile => simply file into + * storage matrix using the map as the input. All excess coefficients are + * put into [gvecs.size()] and not used. i.e. coefs need to be allocated + * 1 higher. Such an approach is not needed for Gamma-point only + * calculations because the basis is spherically ordered. However, when a + * twist-angle is used, the "sphere" of allowed planewaves is shifted. + */ - Vector phi; + Vector phi; - std::vector inputmap; + std::vector inputmap; - ///total number of basis functions - int NumPlaneWaves; + /// total number of basis functions + int NumPlaneWaves; - ///local copy of Lattice - ParticleLayout Lattice; + /// local copy of Lattice + ParticleLayout Lattice; - ///default constructor - PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {} + /// default constructor + PWBasisT() : maxmaxg(0), NumPlaneWaves(0) + { + } - ///constructor - PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {} + /// constructor + PWBasisT(const PosType& twistangle) : + maxmaxg(0), + twist(twistangle), + NumPlaneWaves(0) + { + } - ~PWBasisT() {} + ~PWBasisT() + { + } - ///set the twist angle - void setTwistAngle(const PosType& tang); + /// set the twist angle + void + setTwistAngle(const PosType& tang); - ///reset - void reset(); + /// reset + void + reset(); - /** Read basisset from hdf5 file. Apply ecut. - * @param h5basisgroup h5 node where basis is located - * @param ecutoff cutoff energy - * @param lat CrystalLattice - * @param resizeContainer if true, resize internal storage. - * @return the number of plane waves - */ - int readbasis(hdf_archive& h5basisgroup, - RealType ecutoff, - const ParticleLayout& lat, - const std::string& pwname = "planewaves", - const std::string& pwmultname = "multipliers", - bool resizeContainer = true); + /** Read basisset from hdf5 file. Apply ecut. + * @param h5basisgroup h5 node where basis is located + * @param ecutoff cutoff energy + * @param lat CrystalLattice + * @param resizeContainer if true, resize internal storage. + * @return the number of plane waves + */ + int + readbasis(hdf_archive& h5basisgroup, RealType ecutoff, + const ParticleLayout& lat, const std::string& pwname = "planewaves", + const std::string& pwmultname = "multipliers", + bool resizeContainer = true); - /** Remove basis elements if kinetic energy > ecut. - * - * Keep and indexmap so we know how to match coefficients on read. - */ - void trimforecut(); + /** Remove basis elements if kinetic energy > ecut. + * + * Keep and indexmap so we know how to match coefficients on read. + */ + void + trimforecut(); #if defined(PWBASIS_USE_RECURSIVE) - /** Fill the recursion coefficients matrix. - * - * @todo Generalize to non-orthorohmbic cells - */ - inline void BuildRecursionCoefs(const PosType& pos) - { - PosType tau_red(Lattice.toUnit(pos)); + /** Fill the recursion coefficients matrix. + * + * @todo Generalize to non-orthorohmbic cells + */ + inline void + BuildRecursionCoefs(const PosType& pos) + { + PosType tau_red(Lattice.toUnit(pos)); // RealType phi=TWOPI*tau_red[0]; // RealType nphi=maxg0*phi; // ComplexType ct0(std::cos(phi),std::sin(phi)); @@ -182,162 +200,170 @@ class PWBasisT : public QMCTraits // C2[0]=t; // for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0); #pragma ivdep - for (int idim = 0; idim < 3; idim++) + for (int idim = 0; idim < 3; idim++) { + int ng = maxg[idim]; + RealType phi = TWOPI * tau_red[idim]; + RealType nphi = ng * phi; + ComplexType Ctemp(std::cos(phi), std::sin(phi)); + ComplexType t(std::cos(nphi), -std::sin(nphi)); + ComplexType* restrict cp_ptr = C[idim]; + *cp_ptr++ = t; + for (int n = 1; n <= 2 * ng; n++) { + *cp_ptr++ = (t *= Ctemp); + } + } + // Base version + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // RealType phi=TWOPI*tau_red[idim]; + // ComplexType Ctemp(std::cos(phi),std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + // Not valid for general supercell + // // Cartesian of twist for 1,1,1 (reduced coordinates) + // PosType G111(1.0,1.0,1.0); + // G111 = Lattice.k_cart(G111); + // + // //Precompute a small number of complex factors (PWs along + // b1,b2,b3 lines) + // //using a fast recursion algorithm + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // //start the recursion with the 111 vector. + // RealType phi = pos[idim] * G111[idim]; + // register ComplexType Ctemp(std::cos(phi), std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + } + + inline void + evaluate(const PosType& pos) { - int ng = maxg[idim]; - RealType phi = TWOPI * tau_red[idim]; - RealType nphi = ng * phi; - ComplexType Ctemp(std::cos(phi), std::sin(phi)); - ComplexType t(std::cos(nphi), -std::sin(nphi)); - ComplexType* restrict cp_ptr = C[idim]; - *cp_ptr++ = t; - for (int n = 1; n <= 2 * ng; n++) - { - *cp_ptr++ = (t *= Ctemp); - } + BuildRecursionCoefs(pos); + RealType twistdotr = dot(twist_cart, pos); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr)); + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + Zv[ig] = pw; + } } - //Base version - //#pragma ivdep - // for(int idim=0; idim<3; idim++){ - // RealType phi=TWOPI*tau_red[idim]; - // ComplexType Ctemp(std::cos(phi),std::sin(phi)); - // int ng=maxg[idim]; - // ComplexType* restrict cp_ptr=C[idim]+ng; - // ComplexType* restrict cn_ptr=C[idim]+ng-1; - // *cp_ptr=1.0; - // for(int n=1; n<=ng; n++,cn_ptr--){ - // ComplexType t(Ctemp*(*cp_ptr++)); - // *cp_ptr = t; - // *cn_ptr = conj(t); - // } - // } - //Not valid for general supercell + /** Evaluate all planewaves and derivatives for the iat-th particle + * + * The basis functions are evaluated for particles iat: first <= iat < last + * Evaluate the plane-waves at current particle coordinates using a fast + * recursion algorithm. Order of Y,dY and d2Y is kept correct. + * These can be "dotted" with coefficients later to complete orbital + * evaluations. + */ + inline void + evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + BuildRecursionCoefs(r); + RealType twistdotr = dot(twist_cart, r); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves and derivatives. + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); + // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = + kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = + kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = + kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); + } + } +#else + inline void + evaluate(const PosType& pos) + { + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) + phi[ig] = dot(kplusgvecs_cart[ig], pos); + eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data()); + } + inline void + evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + evaluate(r); + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType& pw = Zv[ig]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = + kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = + kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = + kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); + } + } +#endif + // /** Fill the recursion coefficients matrix. + // * + // * @todo Generalize to non-orthorohmbic cells + // */ + // void BuildRecursionCoefsByAdd(const PosType& pos) + // { // // Cartesian of twist for 1,1,1 (reduced coordinates) // PosType G111(1.0,1.0,1.0); // G111 = Lattice.k_cart(G111); - // - // //Precompute a small number of complex factors (PWs along b1,b2,b3 lines) - // //using a fast recursion algorithm - //#pragma ivdep - // for(int idim=0; idim<3; idim++){ + // //PosType redP=P.Lattice.toUnit(P.R[iat]); + // //Precompute a small number of complex factors (PWs along b1,b2,b3 + // lines) for(int idim=0; idim<3; idim++){ // //start the recursion with the 111 vector. // RealType phi = pos[idim] * G111[idim]; - // register ComplexType Ctemp(std::cos(phi), std::sin(phi)); - // int ng=maxg[idim]; - // ComplexType* restrict cp_ptr=C[idim]+ng; - // ComplexType* restrict cn_ptr=C[idim]+ng-1; - // *cp_ptr=1.0; + // int ng(maxg[idim]); + // RealType* restrict cp_ptr=logC[idim]+ng; + // RealType* restrict cn_ptr=logC[idim]+ng-1; + // *cp_ptr=0.0; + // //add INTEL vectorization // for(int n=1; n<=ng; n++,cn_ptr--){ - // ComplexType t(Ctemp*(*cp_ptr++)); + // RealType t(phi+*cp_ptr++); // *cp_ptr = t; - // *cn_ptr = conj(t); + // *cn_ptr = -t; // } // } - } - - inline void evaluate(const PosType& pos) - { - BuildRecursionCoefs(pos); - RealType twistdotr = dot(twist_cart, pos); - ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); - //Evaluate the planewaves for particle iat. - for (int ig = 0; ig < NumPlaneWaves; ig++) - { - //PW is initialized as exp(i*twist.r) so that the final basis evaluations are for (twist+G).r - ComplexType pw(pw0); //std::cos(twistdotr),std::sin(twistdotr)); - for (int idim = 0; idim < 3; idim++) - pw *= C(idim, gvecs_shifted[ig][idim]); - //pw *= C0[gvecs_shifted[ig][0]]; - //pw *= C1[gvecs_shifted[ig][1]]; - //pw *= C2[gvecs_shifted[ig][2]]; - Zv[ig] = pw; - } - } - /** Evaluate all planewaves and derivatives for the iat-th particle - * - * The basis functions are evaluated for particles iat: first <= iat < last - * Evaluate the plane-waves at current particle coordinates using a fast - * recursion algorithm. Order of Y,dY and d2Y is kept correct. - * These can be "dotted" with coefficients later to complete orbital evaluations. - */ - inline void evaluateAll(const ParticleSet& P, int iat) - { - const PosType& r(P.activeR(iat)); - BuildRecursionCoefs(r); - RealType twistdotr = dot(twist_cart, r); - ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); - //Evaluate the planewaves and derivatives. - ComplexType* restrict zptr = Z.data(); - for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) - { - //PW is initialized as exp(i*twist.r) so that the final basis evaluations - //are for (twist+G).r - ComplexType pw(pw0); - // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed - for (int idim = 0; idim < 3; idim++) - pw *= C(idim, gvecs_shifted[ig][idim]); - //pw *= C0[gvecs_shifted[ig][0]]; - //pw *= C1[gvecs_shifted[ig][1]]; - //pw *= C2[gvecs_shifted[ig][2]]; - zptr[0] = pw; - zptr[1] = minusModKplusG2[ig] * pw; - zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); - zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); - zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); - } - } -#else - inline void evaluate(const PosType& pos) - { - //Evaluate the planewaves for particle iat. - for (int ig = 0; ig < NumPlaneWaves; ig++) - phi[ig] = dot(kplusgvecs_cart[ig], pos); - eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data()); - } - inline void evaluateAll(const ParticleSet& P, int iat) - { - const PosType& r(P.activeR(iat)); - evaluate(r); - ComplexType* restrict zptr = Z.data(); - for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) - { - //PW is initialized as exp(i*twist.r) so that the final basis evaluations - //are for (twist+G).r - ComplexType& pw = Zv[ig]; - zptr[0] = pw; - zptr[1] = minusModKplusG2[ig] * pw; - zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); - zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); - zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); - } - } -#endif - // /** Fill the recursion coefficients matrix. - // * - // * @todo Generalize to non-orthorohmbic cells - // */ - // void BuildRecursionCoefsByAdd(const PosType& pos) - // { - // // Cartesian of twist for 1,1,1 (reduced coordinates) - // PosType G111(1.0,1.0,1.0); - // G111 = Lattice.k_cart(G111); - // //PosType redP=P.Lattice.toUnit(P.R[iat]); - // //Precompute a small number of complex factors (PWs along b1,b2,b3 lines) - // for(int idim=0; idim<3; idim++){ - // //start the recursion with the 111 vector. - // RealType phi = pos[idim] * G111[idim]; - // int ng(maxg[idim]); - // RealType* restrict cp_ptr=logC[idim]+ng; - // RealType* restrict cn_ptr=logC[idim]+ng-1; - // *cp_ptr=0.0; - // //add INTEL vectorization - // for(int n=1; n<=ng; n++,cn_ptr--){ - // RealType t(phi+*cp_ptr++); - // *cp_ptr = t; - // *cn_ptr = -t; - // } - // } - // } + // } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp index a3b1e135ec..6d82f8fdac 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp @@ -1,145 +1,156 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory Mark +// Dewing, markdewing@gmail.com, University of Illinois at +// Urbana-Champaign // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "PWOrbitalSetT.h" #include "Message/Communicate.h" -#include "PWOrbitalSetT.h" #include "Numerics/MatrixOperators.h" namespace qmcplusplus { -template +template PWOrbitalSetT::~PWOrbitalSetT() { - if (OwnBasisSet && myBasisSet) - delete myBasisSet; - if (!IsCloned && this->C != nullptr) - delete this->C; + if (OwnBasisSet && myBasisSet) + delete myBasisSet; + if (!IsCloned && this->C != nullptr) + delete this->C; } -template -std::unique_ptr> PWOrbitalSetT::makeClone() const +template +std::unique_ptr> +PWOrbitalSetT::makeClone() const { - auto myclone = std::make_unique>(*this); - myclone->myBasisSet = new PWBasisT(*myBasisSet); - myclone->IsCloned = true; - return myclone; + auto myclone = std::make_unique>(*this); + myclone->myBasisSet = new PWBasisT(*myBasisSet); + myclone->IsCloned = true; + return myclone; } -template -void PWOrbitalSetT::setOrbitalSetSize(int norbs) {} - -template -void PWOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) +template +void +PWOrbitalSetT::setOrbitalSetSize(int norbs) { - myBasisSet = bset; - this->OrbitalSetSize = nbands; - OwnBasisSet = cleanup; - BasisSetSize = myBasisSet->NumPlaneWaves; - this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize); - this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); - app_log() << " PWOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize - << std::endl; } -template -void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +template +void +PWOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) { - int ng = myBasisSet->inputmap.size(); - if (ng != coefs.size()) - { - app_error() << " Input G map does not match the basis size of wave functions " << std::endl; - OHMMS::Controller->abort(); - } - //drop G points for the given TwistAngle - const std::vector& inputmap(myBasisSet->inputmap); - for (int ig = 0; ig < ng; ig++) - { - if (inputmap[ig] > -1) - (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; - } + myBasisSet = bset; + this->OrbitalSetSize = nbands; + OwnBasisSet = cleanup; + BasisSetSize = myBasisSet->NumPlaneWaves; + this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize); + this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); + app_log() << " PWOrbitalSetT::resize OrbitalSetSize =" + << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize + << std::endl; } -template -void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +template +void +PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) { - int ng = myBasisSet->inputmap.size(); - if (ng != coefs.size()) - { - app_error() << " Input G map does not match the basis size of wave functions " << std::endl; - OHMMS::Controller->abort(); - } - //drop G points for the given TwistAngle - const std::vector& inputmap(myBasisSet->inputmap); - for (int ig = 0; ig < ng; ig++) - { - if (inputmap[ig] > -1) - (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; - } + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) { + app_error() + << " Input G map does not match the basis size of wave functions " + << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } } -template -void PWOrbitalSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) { - //Evaluate every orbital for particle iat. - //Evaluate the basis-set at these coordinates: - //myBasisSet->evaluate(P,iat); - myBasisSet->evaluate(P.activeR(iat)); - MatrixOperators::product(*(this->C), myBasisSet->Zv, psi); + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) { + app_error() + << " Input G map does not match the basis size of wave functions " + << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } } -template -void PWOrbitalSetT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void +PWOrbitalSetT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& psi) { - //Evaluate the orbitals and derivatives for particle iat only. - myBasisSet->evaluateAll(P, iat); - MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); - const T* restrict tptr = this->Temp.data(); - for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) - { - psi[j] = tptr[PW_VALUE]; - d2psi[j] = tptr[PW_LAP]; - dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); - } + // Evaluate every orbital for particle iat. + // Evaluate the basis-set at these coordinates: + // myBasisSet->evaluate(P,iat); + myBasisSet->evaluate(P.activeR(iat)); + MatrixOperators::product(*(this->C), myBasisSet->Zv, psi); } -template -void PWOrbitalSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +PWOrbitalSetT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - for (int iat = first, i = 0; iat < last; iat++, i++) - { + // Evaluate the orbitals and derivatives for particle iat only. myBasisSet->evaluateAll(P, iat); MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); const T* restrict tptr = this->Temp.data(); - for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) - { - logdet(i, j) = tptr[PW_VALUE]; - d2logdet(i, j) = tptr[PW_LAP]; - dlogdet(i, j) = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) { + psi[j] = tptr[PW_VALUE]; + d2psi[j] = tptr[PW_LAP]; + dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + } +} + +template +void +PWOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +{ + for (int iat = first, i = 0; iat < last; iat++, i++) { + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); + const T* restrict tptr = this->Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) { + logdet(i, j) = tptr[PW_VALUE]; + d2logdet(i, j) = tptr[PW_LAP]; + dlogdet(i, j) = + GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + } } - } } // Class concrete types from T -// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is inherently complex -// template class PWOrbitalSetT; -// template class PWOrbitalSetT; +// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is +// inherently complex template class PWOrbitalSetT; template class +// PWOrbitalSetT; template class PWOrbitalSetT>; template class PWOrbitalSetT>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h index 25c3e0d5c1..d4e13de966 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h @@ -1,128 +1,146 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Mark Dewing, +// markdewing@gmail.com, University of Illinois at +// Urbana-Champaign // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - /** @file PWOrbitalSetT.h * @brief Definition of member functions of Plane-wave basis set */ #ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H #define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H +#include "CPU/BLAS.hpp" #include "QMCWaveFunctions/PlaneWave/PWBasisT.h" -#include "type_traits/complex_help.hpp" #include "QMCWaveFunctions/SPOSetT.h" -#include "CPU/BLAS.hpp" +#include "type_traits/complex_help.hpp" namespace qmcplusplus { -template +template class PWOrbitalSetT : public SPOSetT { public: - using RealType = typename SPOSetT::RealType; - using ComplexType = T; - using PosType = typename SPOSetT::PosType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; - using IndexType = typename SPOSetT::IndexType; - - using BasisSet_t = PWBasisT; - using PWBasisPtr = PWBasisT*; - - /** inherit the enum of BasisSet_t */ - enum - { - PW_VALUE = BasisSet_t::PW_VALUE, - PW_LAP = BasisSet_t::PW_LAP, - PW_GRADX = BasisSet_t::PW_GRADX, - PW_GRADY = BasisSet_t::PW_GRADY, - PW_GRADZ = BasisSet_t::PW_GRADZ, - PW_MAXINDEX = BasisSet_t::PW_MAXINDEX - }; - - - /** default constructor - */ - PWOrbitalSetT(const std::string& my_name) - : SPOSetT(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false) - {} - - std::string getClassName() const override { return "PWOrbitalSetT"; } - - - /** delete BasisSet only it owns this - * - * Builder takes care of who owns what - */ - ~PWOrbitalSetT() override; - - std::unique_ptr> makeClone() const override; - /** resize the orbital base - * @param bset PWBasis - * @param nbands number of bands - * @param cleaup if true, owns PWBasis. Will clean up. - */ - void resize(PWBasisPtr bset, int nbands, bool cleanup = false); - - /** Builder class takes care of the assertion - */ - void addVector(const std::vector& coefs, int jorb); - void addVector(const std::vector& coefs, int jorb); - - void setOrbitalSetSize(int norbs) override; - - inline T evaluate(int ib, const PosType& pos) - { - myBasisSet->evaluate(pos); - return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data()); - } - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - /** boolean - * - * If true, this has to delete the BasisSet - */ - bool OwnBasisSet; - ///TwistAngle of this PWOrbitalSetT - PosType TwistAngle; - ///My basis set - PWBasisPtr myBasisSet; - ///number of basis - IndexType BasisSetSize; - /** pointer to matrix containing the coefficients - * - * makeClone makes a shallow copy and flag IsCloned - */ - ValueMatrix* C; - ///if true, do not clean up - bool IsCloned; - - /** temporary array to perform gemm operation */ - Matrix Temp; + using RealType = typename SPOSetT::RealType; + using ComplexType = T; + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using IndexType = typename SPOSetT::IndexType; + + using BasisSet_t = PWBasisT; + using PWBasisPtr = PWBasisT*; + + /** inherit the enum of BasisSet_t */ + enum + { + PW_VALUE = BasisSet_t::PW_VALUE, + PW_LAP = BasisSet_t::PW_LAP, + PW_GRADX = BasisSet_t::PW_GRADX, + PW_GRADY = BasisSet_t::PW_GRADY, + PW_GRADZ = BasisSet_t::PW_GRADZ, + PW_MAXINDEX = BasisSet_t::PW_MAXINDEX + }; + + /** default constructor + */ + PWOrbitalSetT(const std::string& my_name) : + SPOSetT(my_name), + OwnBasisSet(false), + myBasisSet(nullptr), + BasisSetSize(0), + C(nullptr), + IsCloned(false) + { + } + + std::string + getClassName() const override + { + return "PWOrbitalSetT"; + } + + /** delete BasisSet only it owns this + * + * Builder takes care of who owns what + */ + ~PWOrbitalSetT() override; + + std::unique_ptr> + makeClone() const override; + /** resize the orbital base + * @param bset PWBasis + * @param nbands number of bands + * @param cleaup if true, owns PWBasis. Will clean up. + */ + void + resize(PWBasisPtr bset, int nbands, bool cleanup = false); + + /** Builder class takes care of the assertion + */ + void + addVector(const std::vector& coefs, int jorb); + void + addVector(const std::vector& coefs, int jorb); + + void + setOrbitalSetSize(int norbs) override; + + inline T + evaluate(int ib, const PosType& pos) + { + myBasisSet->evaluate(pos); + return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data()); + } + + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + /** boolean + * + * If true, this has to delete the BasisSet + */ + bool OwnBasisSet; + /// TwistAngle of this PWOrbitalSetT + PosType TwistAngle; + /// My basis set + PWBasisPtr myBasisSet; + /// number of basis + IndexType BasisSetSize; + /** pointer to matrix containing the coefficients + * + * makeClone makes a shallow copy and flag IsCloned + */ + ValueMatrix* C; + /// if true, do not clean up + bool IsCloned; + + /** temporary array to perform gemm operation */ + Matrix Temp; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index f76150ec2a..128bca9798 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -21,1022 +21,1006 @@ namespace qmcplusplus { -template -RotatedSPOsT::RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos) - : SPOSetT(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false) +template +RotatedSPOsT::RotatedSPOsT( + const std::string& my_name, std::unique_ptr>&& spos) : + SPOSetT(my_name), + OptimizableObjectT(my_name), + Phi(std::move(spos)), + nel_major_(0), + params_supplied(false) { - this->OrbitalSetSize = Phi->getOrbitalSetSize(); + this->OrbitalSetSize = Phi->getOrbitalSetSize(); } -template +template RotatedSPOsT::~RotatedSPOsT() -{} +{ +} -template -void RotatedSPOsT::setRotationParameters(const std::vector& param_list) +template +void +RotatedSPOsT::setRotationParameters(const std::vector& param_list) { - params = param_list; - params_supplied = true; + params = param_list; + params_supplied = true; } -template -void RotatedSPOsT::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) +template +void +RotatedSPOsT::createRotationIndices( + int nel, int nmo, RotationIndices& rot_indices) { - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); + for (int i = 0; i < nel; i++) + for (int j = nel; j < nmo; j++) + rot_indices.emplace_back(i, j); } -template -void RotatedSPOsT::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) +template +void +RotatedSPOsT::createRotationIndicesFull( + int nel, int nmo, RotationIndices& rot_indices) { - rot_indices.reserve(nmo * (nmo - 1) / 2); - - // start with core-active rotations - put them at the beginning of the list - // so it matches the other list of rotation indices - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); - - // Add core-core rotations - put them at the end of the list - for (int i = 0; i < nel; i++) - for (int j = i + 1; j < nel; j++) - rot_indices.emplace_back(i, j); - - // Add active-active rotations - put them at the end of the list - for (int i = nel; i < nmo; i++) - for (int j = i + 1; j < nmo; j++) - rot_indices.emplace_back(i, j); + rot_indices.reserve(nmo * (nmo - 1) / 2); + + // start with core-active rotations - put them at the beginning of the list + // so it matches the other list of rotation indices + for (int i = 0; i < nel; i++) + for (int j = nel; j < nmo; j++) + rot_indices.emplace_back(i, j); + + // Add core-core rotations - put them at the end of the list + for (int i = 0; i < nel; i++) + for (int j = i + 1; j < nel; j++) + rot_indices.emplace_back(i, j); + + // Add active-active rotations - put them at the end of the list + for (int i = nel; i < nmo; i++) + for (int j = i + 1; j < nmo; j++) + rot_indices.emplace_back(i, j); } -template -void RotatedSPOsT::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat) +template +void +RotatedSPOsT::constructAntiSymmetricMatrix( + const RotationIndices& rot_indices, const std::vector& param, + ValueMatrix& rot_mat) { - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size + assert(rot_indices.size() == param.size()); + // Assumes rot_mat is of the correct size - rot_mat = 0.0; + rot_mat = 0.0; - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - const RealType x = param[i]; + for (int i = 0; i < rot_indices.size(); i++) { + const int p = rot_indices[i].first; + const int q = rot_indices[i].second; + const RealType x = param[i]; - rot_mat[q][p] = x; - rot_mat[p][q] = -x; - } + rot_mat[q][p] = x; + rot_mat[p][q] = -x; + } } -template -void RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param) +template +void +RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( + const RotationIndices& rot_indices, const ValueMatrix& rot_mat, + std::vector& param) { - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - param[i] = rot_mat[q][p]; - } + assert(rot_indices.size() == param.size()); + // Assumes rot_mat is of the correct size + + for (int i = 0; i < rot_indices.size(); i++) { + const int p = rot_indices[i].first; + const int q = rot_indices[i].second; + param[i] = rot_mat[q][p]; + } } -template -void RotatedSPOsT::resetParametersExclusive(const opt_variables_type& active) +template +void +RotatedSPOsT::resetParametersExclusive(const OptVariablesType& active) { - std::vector delta_param(m_act_rot_inds.size()); - - size_t psize = m_act_rot_inds.size(); - - if (use_global_rot_) - { - psize = m_full_rot_inds.size(); - assert(psize >= m_act_rot_inds.size()); - } - - std::vector old_param(psize); - std::vector new_param(psize); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int loc = this->myVars.where(i); - delta_param[i] = active[loc] - this->myVars[i]; - this->myVars[i] = active[loc]; - } - - if (use_global_rot_) - { - for (int i = 0; i < m_full_rot_inds.size(); i++) - old_param[i] = myVarsFull[i]; - - applyDeltaRotation(delta_param, old_param, new_param); - - // Save the the params - for (int i = 0; i < m_full_rot_inds.size(); i++) - myVarsFull[i] = new_param[i]; - } - else - { - apply_rotation(delta_param, false); - - // Save the parameters in the history list - history_params_.push_back(delta_param); - } + std::vector delta_param(m_act_rot_inds.size()); + + size_t psize = m_act_rot_inds.size(); + + if (use_global_rot_) { + psize = m_full_rot_inds.size(); + assert(psize >= m_act_rot_inds.size()); + } + + std::vector old_param(psize); + std::vector new_param(psize); + + for (int i = 0; i < m_act_rot_inds.size(); i++) { + int loc = this->myVars.where(i); + delta_param[i] = active[loc] - this->myVars[i]; + this->myVars[i] = active[loc]; + } + + if (use_global_rot_) { + for (int i = 0; i < m_full_rot_inds.size(); i++) + old_param[i] = myVarsFull[i]; + + applyDeltaRotation(delta_param, old_param, new_param); + + // Save the the params + for (int i = 0; i < m_full_rot_inds.size(); i++) + myVarsFull[i] = new_param[i]; + } + else { + apply_rotation(delta_param, false); + + // Save the parameters in the history list + history_params_.push_back(delta_param); + } } -template -void RotatedSPOsT::writeVariationalParameters(hdf_archive& hout) +template +void +RotatedSPOsT::writeVariationalParameters(hdf_archive& hout) { - hout.push("RotatedSPOsT"); - if (use_global_rot_) - { - hout.push("rotation_global"); - std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); - - int nparam_full = myVarsFull.size(); - std::vector full_params(nparam_full); - for (int i = 0; i < nparam_full; i++) - full_params[i] = myVarsFull[i]; - - hout.write(full_params, rot_global_name); - hout.pop(); - } - else - { - hout.push("rotation_history"); - size_t rows = history_params_.size(); - size_t cols = 0; - if (rows > 0) - cols = history_params_[0].size(); - - Matrix tmp(rows, cols); - for (size_t i = 0; i < rows; i++) - for (size_t j = 0; j < cols; j++) - tmp(i, j) = history_params_[i][j]; - - std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); - hout.write(tmp, rot_hist_name); - hout.pop(); - } + hout.push("RotatedSPOsT"); + if (use_global_rot_) { + hout.push("rotation_global"); + std::string rot_global_name = + std::string("rotation_global_") + SPOSetT::getName(); + + int nparam_full = myVarsFull.size(); + std::vector full_params(nparam_full); + for (int i = 0; i < nparam_full; i++) + full_params[i] = myVarsFull[i]; + + hout.write(full_params, rot_global_name); + hout.pop(); + } + else { + hout.push("rotation_history"); + size_t rows = history_params_.size(); + size_t cols = 0; + if (rows > 0) + cols = history_params_[0].size(); + + Matrix tmp(rows, cols); + for (size_t i = 0; i < rows; i++) + for (size_t j = 0; j < cols; j++) + tmp(i, j) = history_params_[i][j]; + + std::string rot_hist_name = + std::string("rotation_history_") + SPOSetT::getName(); + hout.write(tmp, rot_hist_name); + hout.pop(); + } - // Save myVars in order to restore object state exactly - // The values aren't meaningful, but they need to match those saved in - // VariableSet - hout.push("rotation_params"); - std::string rot_params_name = std::string("rotation_params_") + SPOSetT::getName(); + // Save myVars in order to restore object state exactly + // The values aren't meaningful, but they need to match those saved in + // VariableSet + hout.push("rotation_params"); + std::string rot_params_name = + std::string("rotation_params_") + SPOSetT::getName(); - int nparam = this->myVars.size(); - std::vector params(nparam); - for (int i = 0; i < nparam; i++) - params[i] = this->myVars[i]; + int nparam = this->myVars.size(); + std::vector params(nparam); + for (int i = 0; i < nparam; i++) + params[i] = this->myVars[i]; - hout.write(params, rot_params_name); - hout.pop(); + hout.write(params, rot_params_name); + hout.pop(); - hout.pop(); + hout.pop(); } -template -void RotatedSPOsT::readVariationalParameters(hdf_archive& hin) +template +void +RotatedSPOsT::readVariationalParameters(hdf_archive& hin) { - hin.push("RotatedSPOsT", false); + hin.push("RotatedSPOsT", false); + + bool grp_hist_exists = hin.is_group("rotation_history"); + bool grp_global_exists = hin.is_group("rotation_global"); + if (!grp_hist_exists && !grp_global_exists) + app_warning() << "Rotation parameters not found in VP file"; + + if (grp_global_exists) { + hin.push("rotation_global", false); + std::string rot_global_name = + std::string("rotation_global_") + SPOSetT::getName(); + + std::vector sizes(1); + if (!hin.getShape(rot_global_name, sizes)) + throw std::runtime_error( + "Failed to read rotation_global in VP file"); + + int nparam_full_actual = sizes[0]; + int nparam_full = myVarsFull.size(); + + if (nparam_full != nparam_full_actual) { + std::ostringstream tmp_err; + tmp_err << "Expected number of full rotation parameters (" + << nparam_full << ") does not match number in file (" + << nparam_full_actual << ")"; + throw std::runtime_error(tmp_err.str()); + } + std::vector full_params(nparam_full); + hin.read(full_params, rot_global_name); + for (int i = 0; i < nparam_full; i++) + myVarsFull[i] = full_params[i]; - bool grp_hist_exists = hin.is_group("rotation_history"); - bool grp_global_exists = hin.is_group("rotation_global"); - if (!grp_hist_exists && !grp_global_exists) - app_warning() << "Rotation parameters not found in VP file"; + hin.pop(); - if (grp_global_exists) - { - hin.push("rotation_global", false); - std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); + applyFullRotation(full_params, true); + } + else if (grp_hist_exists) { + hin.push("rotation_history", false); + std::string rot_hist_name = + std::string("rotation_history_") + SPOSetT::getName(); + std::vector sizes(2); + if (!hin.getShape(rot_hist_name, sizes)) + throw std::runtime_error( + "Failed to read rotation history in VP file"); + + int rows = sizes[0]; + int cols = sizes[1]; + history_params_.resize(rows); + Matrix tmp(rows, cols); + hin.read(tmp, rot_hist_name); + for (size_t i = 0; i < rows; i++) { + history_params_[i].resize(cols); + for (size_t j = 0; j < cols; j++) + history_params_[i][j] = tmp(i, j); + } - std::vector sizes(1); - if (!hin.getShape(rot_global_name, sizes)) - throw std::runtime_error("Failed to read rotation_global in VP file"); - - int nparam_full_actual = sizes[0]; - int nparam_full = myVarsFull.size(); - - if (nparam_full != nparam_full_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file (" - << nparam_full_actual << ")"; - throw std::runtime_error(tmp_err.str()); + hin.pop(); + + applyRotationHistory(); } - std::vector full_params(nparam_full); - hin.read(full_params, rot_global_name); - for (int i = 0; i < nparam_full; i++) - myVarsFull[i] = full_params[i]; - hin.pop(); + hin.push("rotation_params", false); + std::string rot_param_name = + std::string("rotation_params_") + SPOSetT::getName(); - applyFullRotation(full_params, true); - } - else if (grp_hist_exists) - { - hin.push("rotation_history", false); - std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); - std::vector sizes(2); - if (!hin.getShape(rot_hist_name, sizes)) - throw std::runtime_error("Failed to read rotation history in VP file"); - - int rows = sizes[0]; - int cols = sizes[1]; - history_params_.resize(rows); - Matrix tmp(rows, cols); - hin.read(tmp, rot_hist_name); - for (size_t i = 0; i < rows; i++) - { - history_params_[i].resize(cols); - for (size_t j = 0; j < cols; j++) - history_params_[i][j] = tmp(i, j); + std::vector sizes(1); + if (!hin.getShape(rot_param_name, sizes)) + throw std::runtime_error("Failed to read rotation_params in VP file"); + + int nparam_actual = sizes[0]; + int nparam = this->myVars.size(); + if (nparam != nparam_actual) { + std::ostringstream tmp_err; + tmp_err << "Expected number of rotation parameters (" << nparam + << ") does not match number in file (" << nparam_actual << ")"; + throw std::runtime_error(tmp_err.str()); } + std::vector params(nparam); + hin.read(params, rot_param_name); + for (int i = 0; i < nparam; i++) + this->myVars[i] = params[i]; + hin.pop(); - applyRotationHistory(); - } + hin.pop(); +} - hin.push("rotation_params", false); - std::string rot_param_name = std::string("rotation_params_") + SPOSetT::getName(); +template +void +RotatedSPOsT::buildOptVariables(const size_t nel) +{ +#if !defined(QMC_COMPLEX) + /* Only rebuild optimized variables if more after-rotation orbitals are + * needed Consider ROHF, there is only one set of SPO for both spin up and + * down Nup > Ndown. nel_major_ will be set Nup. + * + * Use the size of myVars as a flag to avoid building the rotation + * parameters again when a clone is made (the DiracDeterminant constructor + * calls buildOptVariables) + */ + if (nel > nel_major_ && this->myVars.size() == 0) { + nel_major_ = nel; - std::vector sizes(1); - if (!hin.getShape(rot_param_name, sizes)) - throw std::runtime_error("Failed to read rotation_params in VP file"); + const size_t nmo = Phi->getOrbitalSetSize(); - int nparam_actual = sizes[0]; - int nparam = this->myVars.size(); - if (nparam != nparam_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file (" - << nparam_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } + // create active rotation parameter indices + RotationIndices created_m_act_rot_inds; - std::vector params(nparam); - hin.read(params, rot_param_name); - for (int i = 0; i < nparam; i++) - this->myVars[i] = params[i]; + RotationIndices created_full_rot_inds; + if (use_global_rot_) + createRotationIndicesFull(nel, nmo, created_full_rot_inds); - hin.pop(); + createRotationIndices(nel, nmo, created_m_act_rot_inds); - hin.pop(); + buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); + } +#endif } -template -void RotatedSPOsT::buildOptVariables(const size_t nel) +template +void +RotatedSPOsT::buildOptVariables( + const RotationIndices& rotations, const RotationIndices& full_rotations) { #if !defined(QMC_COMPLEX) - /* Only rebuild optimized variables if more after-rotation orbitals are - * needed Consider ROHF, there is only one set of SPO for both spin up and - * down Nup > Ndown. nel_major_ will be set Nup. - * - * Use the size of myVars as a flag to avoid building the rotation - * parameters again when a clone is made (the DiracDeterminant constructor - * calls buildOptVariables) - */ - if (nel > nel_major_ && this->myVars.size() == 0) - { - nel_major_ = nel; - const size_t nmo = Phi->getOrbitalSetSize(); - // create active rotation parameter indices - RotationIndices created_m_act_rot_inds; + // create active rotations + m_act_rot_inds = rotations; - RotationIndices created_full_rot_inds; if (use_global_rot_) - createRotationIndicesFull(nel, nmo, created_full_rot_inds); + m_full_rot_inds = full_rotations; - createRotationIndices(nel, nmo, created_m_act_rot_inds); + if (use_global_rot_) + app_log() << "Orbital rotation using global rotation" << std::endl; + else + app_log() << "Orbital rotation using history" << std::endl; - buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); - } -#endif -} + // This will add the orbital rotation parameters to myVars + // and will also read in initial parameter values supplied in input file + int p, q; + int nparams_active = m_act_rot_inds.size(); -template -void RotatedSPOsT::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) -{ -#if !defined(QMC_COMPLEX) - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotations - m_act_rot_inds = rotations; - - if (use_global_rot_) - m_full_rot_inds = full_rotations; - - if (use_global_rot_) - app_log() << "Orbital rotation using global rotation" << std::endl; - else - app_log() << "Orbital rotation using history" << std::endl; - - // This will add the orbital rotation parameters to myVars - // and will also read in initial parameter values supplied in input file - int p, q; - int nparams_active = m_act_rot_inds.size(); - - app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl; - if (params_supplied) - if (nparams_active != params.size()) - throw std::runtime_error("The number of supplied orbital rotation parameters does not " - "match number prdouced by the slater " - "expansion. \n"); - - this->myVars.clear(); - for (int i = 0; i < nparams_active; i++) - { - p = m_act_rot_inds[i].first; - q = m_act_rot_inds[i].second; - std::stringstream sstr; - sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") - << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - - // If the user input parameters, use those. Otherwise, initialize the - // parameters to zero + app_log() << "nparams_active: " << nparams_active + << " params2.size(): " << params.size() << std::endl; if (params_supplied) - { - this->myVars.insert(sstr.str(), params[i]); + if (nparams_active != params.size()) + throw std::runtime_error( + "The number of supplied orbital rotation parameters does not " + "match number prdouced by the slater " + "expansion. \n"); + + this->myVars.clear(); + for (int i = 0; i < nparams_active; i++) { + p = m_act_rot_inds[i].first; + q = m_act_rot_inds[i].second; + std::stringstream sstr; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") + << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" + << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") + << (q < 1000 ? "0" : "") << q; + + // If the user input parameters, use those. Otherwise, initialize the + // parameters to zero + if (params_supplied) { + this->myVars.insert(sstr.str(), params[i]); + } + else { + this->myVars.insert(sstr.str(), 0.0); + } } - else - { - this->myVars.insert(sstr.str(), 0.0); + + if (use_global_rot_) { + myVarsFull.clear(); + for (int i = 0; i < m_full_rot_inds.size(); i++) { + p = m_full_rot_inds[i].first; + q = m_full_rot_inds[i].second; + std::stringstream sstr; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") + << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" + << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") + << (q < 1000 ? "0" : "") << q; + + if (params_supplied && i < m_act_rot_inds.size()) + myVarsFull.insert(sstr.str(), params[i]); + else + myVarsFull.insert(sstr.str(), 0.0); + } + } + + // Printing the parameters + if (true) { + app_log() << std::string(16, ' ') << "Parameter name" + << std::string(15, ' ') << "Value\n"; + this->myVars.print(app_log()); } - } - - if (use_global_rot_) - { - myVarsFull.clear(); - for (int i = 0; i < m_full_rot_inds.size(); i++) - { - p = m_full_rot_inds[i].first; - q = m_full_rot_inds[i].second; - std::stringstream sstr; - sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") - << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") - << q; - - if (params_supplied && i < m_act_rot_inds.size()) - myVarsFull.insert(sstr.str(), params[i]); - else - myVarsFull.insert(sstr.str(), 0.0); + + if (params_supplied) { + std::vector param(m_act_rot_inds.size()); + for (int i = 0; i < m_act_rot_inds.size(); i++) + param[i] = this->myVars[i]; + apply_rotation(param, false); } - } - - // Printing the parameters - if (true) - { - app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n"; - this->myVars.print(app_log()); - } - - if (params_supplied) - { - std::vector param(m_act_rot_inds.size()); - for (int i = 0; i < m_act_rot_inds.size(); i++) - param[i] = this->myVars[i]; - apply_rotation(param, false); - } #endif } -template -void RotatedSPOsT::apply_rotation(const std::vector& param, bool use_stored_copy) +template +void +RotatedSPOsT::apply_rotation( + const std::vector& param, bool use_stored_copy) { - assert(param.size() == m_act_rot_inds.size()); + assert(param.size() == m_act_rot_inds.size()); - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); + constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); + /* + rot_mat is now an anti-hermitian matrix. Now we convert + it into a unitary matrix via rot_mat = exp(-rot_mat). + Finally, apply unitary matrix to orbs. + */ + exponentiate_antisym_matrix(rot_mat); + Phi->applyRotation(rot_mat, use_stored_copy); } -template -void RotatedSPOsT::applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param) +template +void +RotatedSPOsT::applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, std::vector& new_param) { - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix new_rot_mat(nmo, nmo); - constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix new_rot_mat(nmo, nmo); + constructDeltaRotation(delta_param, old_param, m_act_rot_inds, + m_full_rot_inds, new_param, new_rot_mat); - Phi->applyRotation(new_rot_mat, true); + Phi->applyRotation(new_rot_mat, true); } -template -void RotatedSPOsT::constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat) +template +void +RotatedSPOsT::constructDeltaRotation( + const std::vector& delta_param, + const std::vector& old_param, const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, std::vector& new_param, + ValueMatrix& new_rot_mat) { - assert(delta_param.size() == act_rot_inds.size()); - assert(old_param.size() == full_rot_inds.size()); - assert(new_param.size() == full_rot_inds.size()); + assert(delta_param.size() == act_rot_inds.size()); + assert(old_param.size() == full_rot_inds.size()); + assert(new_param.size() == full_rot_inds.size()); - const size_t nmo = new_rot_mat.rows(); - assert(new_rot_mat.rows() == new_rot_mat.cols()); + const size_t nmo = new_rot_mat.rows(); + assert(new_rot_mat.rows() == new_rot_mat.cols()); - ValueMatrix old_rot_mat(nmo, nmo); + ValueMatrix old_rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); - exponentiate_antisym_matrix(old_rot_mat); + constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); + exponentiate_antisym_matrix(old_rot_mat); - ValueMatrix delta_rot_mat(nmo, nmo); + ValueMatrix delta_rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); - exponentiate_antisym_matrix(delta_rot_mat); + constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); + exponentiate_antisym_matrix(delta_rot_mat); - // Apply delta rotation to old rotation. - BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), - nmo); + // Apply delta rotation to old rotation. + BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, + old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), nmo); - ValueMatrix log_rot_mat(nmo, nmo); - log_antisym_matrix(new_rot_mat, log_rot_mat); - extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); + ValueMatrix log_rot_mat(nmo, nmo); + log_antisym_matrix(new_rot_mat, log_rot_mat); + extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); } -template -void RotatedSPOsT::applyFullRotation(const std::vector& full_param, bool use_stored_copy) +template +void +RotatedSPOsT::applyFullRotation( + const std::vector& full_param, bool use_stored_copy) { - assert(full_param.size() == m_full_rot_inds.size()); - - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - rot_mat = T(0); + assert(full_param.size() == m_full_rot_inds.size()); - constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); - - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix rot_mat(nmo, nmo); + rot_mat = T(0); + + constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); + + /* + rot_mat is now an anti-hermitian matrix. Now we convert + it into a unitary matrix via rot_mat = exp(-rot_mat). + Finally, apply unitary matrix to orbs. + */ + exponentiate_antisym_matrix(rot_mat); + Phi->applyRotation(rot_mat, use_stored_copy); } -template -void RotatedSPOsT::applyRotationHistory() +template +void +RotatedSPOsT::applyRotationHistory() { - for (auto delta_param : history_params_) - { - apply_rotation(delta_param, false); - } + for (auto delta_param : history_params_) { + apply_rotation(delta_param, false); + } } // compute exponential of a real, antisymmetric matrix by diagonalizing and // exponentiating eigenvalues -template -void RotatedSPOsT::exponentiate_antisym_matrix(ValueMatrix& mat) +template +void +RotatedSPOsT::exponentiate_antisym_matrix(ValueMatrix& mat) { - const int n = mat.rows(); - std::vector> mat_h(n * n, 0); - std::vector eval(n, 0); - std::vector> work(2 * n, 0); - std::vector rwork(3 * n, 0); - std::vector> mat_d(n * n, 0); - std::vector> mat_t(n * n, 0); - // exponentiating e^X = e^iY (Y hermitian) - // i(-iX) = X, so -iX is hermitian - // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T - // construct hermitian analogue of mat by multiplying by -i - for (int i = 0; i < n; ++i) - { - for (int j = i; j < n; ++j) - { - mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); - mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); + const int n = mat.rows(); + std::vector> mat_h(n * n, 0); + std::vector eval(n, 0); + std::vector> work(2 * n, 0); + std::vector rwork(3 * n, 0); + std::vector> mat_d(n * n, 0); + std::vector> mat_t(n * n, 0); + // exponentiating e^X = e^iY (Y hermitian) + // i(-iX) = X, so -iX is hermitian + // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T + // construct hermitian analogue of mat by multiplying by -i + for (int i = 0; i < n; ++i) { + for (int j = i; j < n; ++j) { + mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); + mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); + } } - } - // diagonalize the matrix - char JOBZ('V'); - char UPLO('U'); - int N(n); - int LDA(n); - int LWORK(2 * n); - int info = 0; - LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - // iterate through diagonal matrix, exponentiate terms - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - mat_d[i + j * n] = (i == j) ? std::exp(std::complex(0.0, eval[i])) : std::complex(0.0, 0.0); + // diagonalize the matrix + char JOBZ('V'); + char UPLO('U'); + int N(n); + int LDA(n); + int LWORK(2 * n); + int info = 0; + LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), + LWORK, &rwork.at(0), info); + if (info != 0) { + std::ostringstream msg; + msg << "heev failed with info = " << info + << " in RotatedSPOsT::exponentiate_antisym_matrix"; + throw std::runtime_error(msg.str()); } - } - // perform matrix multiplication - // assume row major - BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n, - std::complex(0.0, 0.0), &mat_t.at(0), n); - BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n, - std::complex(0.0, 0.0), &mat_d.at(0), n); - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_d[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in orbital " - "rotation matrix: (i,j) = (" - << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl; - } - mat[j][i] = mat_d[i + n * j].real(); + // iterate through diagonal matrix, exponentiate terms + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + mat_d[i + j * n] = (i == j) ? + std::exp(std::complex(0.0, eval[i])) : + std::complex(0.0, 0.0); + } } + // perform matrix multiplication + // assume row major + BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), + n, &mat_h.at(0), n, std::complex(0.0, 0.0), &mat_t.at(0), n); + BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), + n, &mat_t.at(0), n, std::complex(0.0, 0.0), &mat_d.at(0), n); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) { + if (mat_d[i + n * j].imag() > 1e-12) { + app_log() << "warning: large imaginary value in orbital " + "rotation matrix: (i,j) = (" + << i << "," << j + << "), im = " << mat_d[i + n * j].imag() << std::endl; + } + mat[j][i] = mat_d[i + n * j].real(); + } } -template -void RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) +template +void +RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) { - const int n = mat.rows(); - std::vector mat_h(n * n, 0); - std::vector eval_r(n, 0); - std::vector eval_i(n, 0); - std::vector mat_l(n * n, 0); - std::vector work(4 * n, 0); - - std::vector> mat_cd(n * n, 0); - std::vector> mat_cl(n * n, 0); - std::vector> mat_ch(n * n, 0); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - mat_h[i + n * j] = mat[i][j]; - - // diagonalize the matrix - char JOBL('V'); - char JOBR('N'); - int N(n); - int LDA(n); - int LWORK(4 * n); - int info = 0; - LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, - &work.at(0), &LWORK, &info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - - // iterate through diagonal matrix, take log - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - auto tmp = (i == j) ? std::log(std::complex(eval_r[i], eval_i[i])) : std::complex(0.0, 0.0); - mat_cd[i + j * n] = tmp; - - if (eval_i[j] > 0.0) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], mat_l[i + (j + 1) * n]); - mat_cl[i + (j + 1) * n] = std::complex(mat_l[i + j * n], -mat_l[i + (j + 1) * n]); - } - else if (!(eval_i[j] < 0.0)) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], 0.0); - } + const int n = mat.rows(); + std::vector mat_h(n * n, 0); + std::vector eval_r(n, 0); + std::vector eval_i(n, 0); + std::vector mat_l(n * n, 0); + std::vector work(4 * n, 0); + + std::vector> mat_cd(n * n, 0); + std::vector> mat_cl(n * n, 0); + std::vector> mat_ch(n * n, 0); + + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + mat_h[i + n * j] = mat[i][j]; + + // diagonalize the matrix + char JOBL('V'); + char JOBR('N'); + int N(n); + int LDA(n); + int LWORK(4 * n); + int info = 0; + LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), + &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, &work.at(0), &LWORK, + &info); + if (info != 0) { + std::ostringstream msg; + msg << "heev failed with info = " << info + << " in RotatedSPOsT::log_antisym_matrix"; + throw std::runtime_error(msg.str()); } - } - - RealType one(1.0); - RealType zero(0.0); - BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n); - BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_cd[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in antisymmetric " - "matrix: (i,j) = (" - << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl; - } - output[i][j] = mat_cd[i + n * j].real(); + + // iterate through diagonal matrix, take log + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + auto tmp = (i == j) ? + std::log(std::complex(eval_r[i], eval_i[i])) : + std::complex(0.0, 0.0); + mat_cd[i + j * n] = tmp; + + if (eval_i[j] > 0.0) { + mat_cl[i + j * n] = std::complex( + mat_l[i + j * n], mat_l[i + (j + 1) * n]); + mat_cl[i + (j + 1) * n] = std::complex( + mat_l[i + j * n], -mat_l[i + (j + 1) * n]); + } + else if (!(eval_i[j] < 0.0)) { + mat_cl[i + j * n] = + std::complex(mat_l[i + j * n], 0.0); + } + } } + + RealType one(1.0); + RealType zero(0.0); + BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, + &mat_ch.at(0), n); + BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, + &mat_cd.at(0), n); + + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) { + if (mat_cd[i + n * j].imag() > 1e-12) { + app_log() << "warning: large imaginary value in antisymmetric " + "matrix: (i,j) = (" + << i << "," << j + << "), im = " << mat_cd[i + n * j].imag() + << std::endl; + } + output[i][j] = mat_cd[i + n * j].real(); + } } -template -void RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) +template +void +RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesType& optvars, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, + int FirstIndex, int LastIndex) { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); + + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); + + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; + + const ParticleSetT& P = VP.getRefPS(); + int iel = VP.refPtcl; + + Phi->evaluate_notranspose( + P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); + + Invert(psiM_inv.data(), nel, nel); - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + ValueMatrix T_orig; + T_orig.resize(nel, nmo); + + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), + T_orig.data(), nmo); + + ValueMatrix T_mat; + T_mat.resize(nel, nmo); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); + ValueVector tmp_psi; + tmp_psi.resize(nmo); - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; + for (int iat = 0; iat < VP.getTotalNum(); iat++) { + Phi->evaluateValue(VP, iat, tmp_psi); - const ParticleSet& P = VP.getRefPS(); - int iel = VP.refPtcl; + for (int j = 0; j < nmo; j++) + psiM_all(iel - FirstIndex, j) = tmp_psi[j]; - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + Invert(psiM_inv.data(), nel, nel); - Invert(psiM_inv.data(), nel, nel); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - ValueMatrix T_orig; - T_orig.resize(nel, nmo); + // The matrix A is rectangular. Ainv is the inverse of the square part + // of the matrix. The multiply of Ainv and the square part of A is just + // the identity. This multiply could be reduced to Ainv and the + // non-square part of A. + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), + T_mat.data(), nmo); + + for (int i = 0; i < m_act_rot_inds.size(); i++) { + int kk = this->myVars.where(i); + if (kk >= 0) { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dratios(iat, kk) = T_mat(p, q) - + T_orig(p, q); // dratio size is (nknot, num_vars) + } + } + } +} - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo); +template +void +RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, + int LastIndex) +{ + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix T_mat; - T_mat.resize(nel, nmo); + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - ValueVector tmp_psi; - tmp_psi.resize(nmo); + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); - for (int iat = 0; iat < VP.getTotalNum(); iat++) - { - Phi->evaluateValue(VP, iat, tmp_psi); + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; - for (int j = 0; j < nmo; j++) - psiM_all(iel - FirstIndex, j) = tmp_psi[j]; + Phi->evaluate_notranspose( + P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); Invert(psiM_inv.data(), nel, nel); + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 const T* const A(psiM_all.data()); const T* const Ainv(psiM_inv.data()); - - // The matrix A is rectangular. Ainv is the inverse of the square part - // of the matrix. The multiply of Ainv and the square part of A is just - // the identity. This multiply could be reduced to Ainv and the - // non-square part of A. - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = this->myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) - } + ValueMatrix T_mat; + T_mat.resize(nel, nmo); + + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), + T_mat.data(), nmo); + + for (int i = 0; i < m_act_rot_inds.size(); i++) { + int kk = this->myVars.where(i); + if (kk >= 0) { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dlogpsi[kk] = T_mat(p, q); + } } - } } -template -void RotatedSPOsT::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) +template +void +RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + Vector& dhpsioverpsi, const int& FirstIndex, const int& LastIndex) { - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 + myG_temp.resize(nel); + myG_J.resize(nel); + myL_temp.resize(nel); + myL_J.resize(nel); - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; + myG_temp = 0; + myG_J = 0; + myL_temp = 0; + myL_J = 0; - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + Bbar.resize(nel, nmo); + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + Bbar = 0; + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; - Invert(psiM_inv.data(), nel, nel); + Phi->evaluate_notranspose( + P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - ValueMatrix T_mat; - T_mat.resize(nel, nmo); + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); + Invert(psiM_inv.data(), nel, nel); - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = this->myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] = T_mat(p, q); + // current value of Gradient and Laplacian + // gradient components + for (int a = 0; a < nel; a++) + for (int i = 0; i < nel; i++) + for (int k = 0; k < 3; k++) + myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; + // laplacian components + for (int a = 0; a < nel; a++) { + for (int i = 0; i < nel; i++) + myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); } - } -} -template -void RotatedSPOsT::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) -{ - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - myG_temp.resize(nel); - myG_J.resize(nel); - myL_temp.resize(nel); - myL_J.resize(nel); - - myG_temp = 0; - myG_J = 0; - myL_temp = 0; - myL_J = 0; - - Bbar.resize(nel, nmo); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - Bbar = 0; - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - // current value of Gradient and Laplacian - // gradient components - for (int a = 0; a < nel; a++) - for (int i = 0; i < nel; i++) - for (int k = 0; k < 3; k++) - myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; - // laplacian components - for (int a = 0; a < nel; a++) - { + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The value of + // P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and + // this is what myL_J will hold + for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) { + myG_J[a] = (P.G[iat] - myG_temp[a]); + myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); + } + // possibly replace wit BLAS calls for (int i = 0; i < nel; i++) - myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); - } - - // calculation of myG_J which will be used to represent - // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to - // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The value of - // P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and - // this is what myL_J will hold - for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) - { - myG_J[a] = (P.G[iat] - myG_temp[a]); - myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); - } - // possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - const T* const B(Bbar.data()); - ValueMatrix T_mat; - ValueMatrix Y1; - ValueMatrix Y2; - ValueMatrix Y3; - ValueMatrix Y4; - T_mat.resize(nel, nmo); - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); - BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), nel, T(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y3.data(), nmo); - - // possibly replace with BLAS call - Y4 = Y3 - Y2; - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = this->myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += T_mat(p, q); - dhpsioverpsi[kk] += T(-0.5) * Y4(p, q); + for (int j = 0; j < nmo; j++) + Bbar(i, j) = d2psiM_all(i, j) + + 2.0 * dot(myG_J[i], dpsiM_all(i, j)) + + myL_J[i] * psiM_all(i, j); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + const T* const B(Bbar.data()); + ValueMatrix T_mat; + ValueMatrix Y1; + ValueMatrix Y2; + ValueMatrix Y3; + ValueMatrix Y4; + T_mat.resize(nel, nmo); + Y1.resize(nel, nel); + Y2.resize(nel, nmo); + Y3.resize(nel, nmo); + Y4.resize(nel, nmo); + + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), + T_mat.data(), nmo); + BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), + Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), + nel, T(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), + Y3.data(), nmo); + + // possibly replace with BLAS call + Y4 = Y3 - Y2; + + for (int i = 0; i < m_act_rot_inds.size(); i++) { + int kk = this->myVars.where(i); + if (kk >= 0) { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dlogpsi[kk] += T_mat(p, q); + dhpsioverpsi[kk] += T(-0.5) * Y4(p, q); + } } - } } -template -void RotatedSPOsT::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) +template +void +RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + Vector& dhpsioverpsi, const T& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const GradMatrix& grads_up, const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, const ValueMatrix& B_lapl, + const std::vector& detData_up, const size_t N1, const size_t N2, + const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl) { - bool recalculate(false); - for (int k = 0; k < this->myVars.size(); ++k) - { - int kk = this->myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - const int NP = P.getTotalNum(); - myG_temp.resize(NP); - myG_temp = 0.0; - myL_temp.resize(NP); - myL_temp = 0.0; - myG_J.resize(NP); - myG_J = 0.0; - myL_J.resize(NP); - myL_J = 0.0; - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - const T* restrict C_p = Coeff.data(); - for (int i = 0; i < Coeff.size(); i++) - { - const size_t upC = C2node_up[i]; - const size_t dnC = C2node_dn[i]; - const T tmp1 = C_p[i] * detValues_dn[dnC]; - const T tmp2 = C_p[i] * detValues_up[upC]; - for (size_t k = 0, j = N1; k < NP1; k++, j++) - { - myG_temp[j] += tmp1 * grads_up(upC, k); - myL_temp[j] += tmp1 * lapls_up(upC, k); - } - for (size_t k = 0, j = N2; k < NP2; k++, j++) - { - myG_temp[j] += tmp2 * grads_dn(dnC, k); - myL_temp[j] += tmp2 * lapls_dn(dnC, k); - } + bool recalculate(false); + for (int k = 0; k < this->myVars.size(); ++k) { + int kk = this->myVars.where(k); + if (kk < 0) + continue; + if (optvars.recompute(kk)) + recalculate = true; } + if (recalculate) { + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; + const int NP = P.getTotalNum(); + myG_temp.resize(NP); + myG_temp = 0.0; + myL_temp.resize(NP); + myL_temp = 0.0; + myG_J.resize(NP); + myG_J = 0.0; + myL_J.resize(NP); + myL_J = 0.0; + const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = P.last(0) - P.first(0); + + const T* restrict C_p = Coeff.data(); + for (int i = 0; i < Coeff.size(); i++) { + const size_t upC = C2node_up[i]; + const size_t dnC = C2node_dn[i]; + const T tmp1 = C_p[i] * detValues_dn[dnC]; + const T tmp2 = C_p[i] * detValues_up[upC]; + for (size_t k = 0, j = N1; k < NP1; k++, j++) { + myG_temp[j] += tmp1 * grads_up(upC, k); + myL_temp[j] += tmp1 * lapls_up(upC, k); + } + for (size_t k = 0, j = N2; k < NP2; k++, j++) { + myG_temp[j] += tmp2 * grads_dn(dnC, k); + myL_temp[j] += tmp2 * lapls_dn(dnC, k); + } + } - myG_temp *= (1 / psiCurrent); - myL_temp *= (1 / psiCurrent); + myG_temp *= (1 / psiCurrent); + myL_temp *= (1 / psiCurrent); + + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The + // value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 + // \psi}{\psi} and this is what myL_J will hold + for (int iat = 0; iat < (myL_temp.size()); iat++) { + myG_J[iat] = (P.G[iat] - myG_temp[iat]); + myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); + } - // calculation of myG_J which will be used to represent - // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to - // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The - // value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 - // \psi}{\psi} and this is what myL_J will hold - for (int iat = 0; iat < (myL_temp.size()); iat++) - { - myG_J[iat] = (P.G[iat] - myG_temp[iat]); - myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); + table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, + psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, + grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, + Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); } - - table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, - detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn, - B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); - } } -template -void RotatedSPOsT::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void +RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + const ValueType& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { - bool recalculate(false); - for (int k = 0; k < this->myVars.size(); ++k) - { - int kk = this->myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); + bool recalculate(false); + for (int k = 0; k < this->myVars.size(); ++k) { + int kk = this->myVars.where(k); + if (kk < 0) + continue; + if (optvars.recompute(kk)) + recalculate = true; + } + if (recalculate) { + const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = P.last(0) - P.first(0); - table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up, - M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl); - } + table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, + C2node_dn, detValues_up, detValues_dn, M_up, M_dn, Minv_up, Minv_dn, + detData_up, lookup_tbl); + } } -template -void RotatedSPOsT::table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) +template +void +RotatedSPOsT::table_method_eval(Vector& dlogpsi, Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, const size_t nel, + const size_t nmo, const T& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const GradMatrix& grads_up, const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, const ValueMatrix& B_lapl, + const std::vector& detData_up, const size_t N1, const size_t N2, + const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl) /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GUIDE TO THE MATICES BEING BUILT ---------------------------------------------- @@ -1045,8 +1029,8 @@ determiant the table method is employed to calculate the contributions to the parameter derivatives (dhpsioverpsi/dlogpsi) loop through unquie determinants - loop through parameters - evaluate contributaion to dlogpsi and dhpsioverpsi + loop through parameters + evaluate contributaion to dlogpsi and dhpsioverpsi \noindent BLAS GUIDE for matrix multiplication of [ alpha * A.B + beta * C = C ] @@ -1062,16 +1046,16 @@ This notation is inspired by http://dx.doi.org/10.1063/1.4948778 \newline \hfill\break $ - A_{i,j}=\phi_j(r_{i}) \\ - T = A^{-1} \widetilde{A} \\ - B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla + A_{i,j}=\phi_j(r_{i}) \\ + T = A^{-1} \widetilde{A} \\ + B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\ - \hat{O_{I}} = \hat{O}D_{I} \\ - D_{I}=det(A_{I}) \newline - \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\ - \Psi_{total} = \psi_{J}\psi_{MS} \\ - \alpha_{I} = P^{T}_{I}TQ_{I} \\ - M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - + \hat{O_{I}} = \hat{O}D_{I} \\ + D_{I}=det(A_{I}) \newline + \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\ + \Psi_{total} = \psi_{J}\psi_{MS} \\ + \alpha_{I} = P^{T}_{I}TQ_{I} \\ + M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\ $ \newline @@ -1090,10 +1074,10 @@ Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\ Below is a translation of the shorthand I use to represent matrices independent of ``excitation matrix". \newline \hfill\break $ - Y_{1} = A^{-1}B \\ - Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\ - Y_{3} = A^{-1}\widetilde{B} \\ - Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} + Y_{1} = A^{-1}B \\ + Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\ + Y_{3} = A^{-1}\widetilde{B} \\ + Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )\\ $ \newline @@ -1106,14 +1090,14 @@ reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix. \newline \hfill\break $ - Y_{5} = TQ \\ - Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\ - Y_{7} = \alpha_{I}^{-1} P^{T} \\ - Y_{11} = \widetilde{M}Q \\ - Y_{23} = P^{T}\widetilde{M}Q \\ - Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\ - Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\ - Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\ + Y_{5} = TQ \\ + Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\ + Y_{7} = \alpha_{I}^{-1} P^{T} \\ + Y_{11} = \widetilde{M}Q \\ + Y_{23} = P^{T}\widetilde{M}Q \\ + Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\ + Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\ + Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\ $ \newline So far you will notice that I have not included up or down arrows to specify @@ -1125,14 +1109,14 @@ derivatives. Of course the down spin expression can be retrieved by swapping the up and down arrows. I have dubbed any expression with lowercase p prefix as a "precursor" to an expression actually used... \newline \hfill\break $ - \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\ - \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\ - pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] + \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\ + \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\ + pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}) \\ - pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} + pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ + pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ + pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ + pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T}) \\ $ \newline @@ -1140,34 +1124,34 @@ Now these p matrices will be used to make various expressions via BLAS commands. \newline \hfill\break $ - K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\ - TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ - TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\ - K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) + K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} + TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ - MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} + MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\ - K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) + K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) + TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) + K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ + TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ \\ - K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} + K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ - TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} + TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ $ \newline @@ -1187,500 +1171,521 @@ to each element will be called B_bar $ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ { - ValueMatrix Table; - ValueMatrix Bbar; - ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; - ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - pK1.resize(nmo, nel); - K1T.resize(nmo, nmo); - TK1T.resize(nel, nmo); - - pK2.resize(nmo, nel); - K2AiB.resize(nmo, nmo); - TK2AiB.resize(nel, nmo); - K2XA.resize(nmo, nmo); - TK2XA.resize(nel, nmo); - K2T.resize(nmo, nmo); - TK2T.resize(nel, nmo); - MK2T.resize(nel, nmo); - - pK3.resize(nmo, nel); - K3T.resize(nmo, nmo); - TK3T.resize(nel, nmo); - - pK5.resize(nmo, nel); - K5T.resize(nmo, nmo); - TK5T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const T* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - // B_grad holds the gradient operator - // B_lapl holds the laplacian operator - // B_bar will hold our special O operator - - const int offset1(N1); - const int offset2(N2); - const int NPother(NP2); - - T* T_(Table.data()); - - // possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); - - const T* restrict B(Bbar.data()); - const T* restrict A(M_up.data()); - const T* restrict Ainv(Minv_up.data()); - // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF - // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR - // THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - // REMINDER: that the ValueMatrix "matrix" stores data in a row major order - // and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); - - BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); - - // possibly replace with BLAS call - Y4 = Y3 - Y2; - - // Need to create the constants: (Oi, const0, const1, const2)to take - // advantage of minimal BLAS commands; Oi is the special operator applied to - // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}} - //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as - //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} - std::vector Oi(num_unique_dn_dets); - - for (int index = 0; index < num_unique_dn_dets; index++) - for (int iat = 0; iat < NPother; iat++) - Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + - myL_J[offset2 + iat] * detValues_dn[index]; - - // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 = - // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 = - // \sum_{i=1} C_{i}*det(A_{i\downarrow})* - // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK1.begin(), pK1.end(), 0.0); - std::fill(pK2.begin(), pK2.end(), 0.0); - std::fill(pK3.begin(), pK3.end(), 0.0); - std::fill(pK5.begin(), pK5.end(), 0.0); - - // Now we are going to loop through all unique determinants. - // The few lines above are for the reference matrix contribution. - // Although I start the loop below from index 0, the loop only performs - // actions when the index is >= 1 the detData object contains all the - // information about the P^T and Q matrices (projection matrices) needed in - // the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; + ValueMatrix Table; + ValueMatrix Bbar; + ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; + ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, + MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; + + Table.resize(nel, nmo); + + Bbar.resize(nel, nmo); + + Y1.resize(nel, nel); + Y2.resize(nel, nmo); + Y3.resize(nel, nmo); + Y4.resize(nel, nmo); + + pK1.resize(nmo, nel); + K1T.resize(nmo, nmo); + TK1T.resize(nel, nmo); + + pK2.resize(nmo, nel); + K2AiB.resize(nmo, nmo); + TK2AiB.resize(nel, nmo); + K2XA.resize(nmo, nmo); + TK2XA.resize(nel, nmo); + K2T.resize(nmo, nmo); + TK2T.resize(nel, nmo); + MK2T.resize(nel, nmo); + + pK3.resize(nmo, nel); + K3T.resize(nmo, nmo); + TK3T.resize(nel, nmo); + + pK5.resize(nmo, nel); + K5T.resize(nmo, nmo); + TK5T.resize(nel, nmo); + + const int parameters_size(m_act_rot_inds.size()); + const int parameter_start_index(0); + + const size_t num_unique_up_dets(detValues_up.size()); + const size_t num_unique_dn_dets(detValues_dn.size()); + + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); + const size_t* restrict upC(C2node_up.data()); + const size_t* restrict dnC(C2node_dn.data()); + // B_grad holds the gradient operator + // B_lapl holds the laplacian operator + // B_bar will hold our special O operator + + const int offset1(N1); + const int offset2(N2); + const int NPother(NP2); + + T* T_(Table.data()); + + // possibly replace wit BLAS calls + for (int i = 0; i < nel; i++) + for (int j = 0; j < nmo; j++) + Bbar(i, j) = B_lapl(i, j) + + 2.0 * dot(myG_J[i + offset1], B_grad(i, j)) + + myL_J[i + offset1] * M_up(i, j); + + const T* restrict B(Bbar.data()); + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, + RealType(0.0), T_, nmo); + + BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, + RealType(0.0), Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, + RealType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, + RealType(0.0), Y3.data(), nmo); + + // possibly replace with BLAS call + Y4 = Y3 - Y2; + + // Need to create the constants: (Oi, const0, const1, const2)to take + // advantage of minimal BLAS commands; Oi is the special operator applied to + // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}} + //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as + //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} + std::vector Oi(num_unique_dn_dets); + + for (int index = 0; index < num_unique_dn_dets; index++) + for (int iat = 0; iat < NPother; iat++) + Oi[index] += lapls_dn(index, iat) + + 2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + + myL_J[offset2 + iat] * detValues_dn[index]; + + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 = + // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 = + // \sum_{i=1} C_{i}*det(A_{i\downarrow})* + // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) + RealType const0(0.0), const1(0.0), const2(0.0); + for (size_t i = 0; i < nc; ++i) { + const RealType c = cptr[i]; + const size_t up = upC[i]; + const size_t down = dnC[i]; + + const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); + const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); } - else - { - // Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - // Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - // Any matrix multiplication of P^T or Q is simply a projection - // Explicit matrix multiplication can be avoided; instead column or - // row copying can be done BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col - // element), Pointer to resultant matrix + place to begin - // pasting, storage spacing of resultant matrix) - // For example the next 4 lines is the matrix multiplication of T*Q - // = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y11.resize(nel, k); - Y23.resize(k, k); - Y24.resize(k, k); - Y25.resize(k, k); - Y26.resize(k, nel); - - std::fill(Y11.begin(), Y11.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k); - } - - std::fill(Y23.begin(), Y23.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1); - } - - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k); - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k); - - Y26.resize(k, nel); - - std::fill(Y26.begin(), Y26.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel); - } - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - // c_Tr_AlphaI_MI is a constant contributing to constant const2 - // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] - RealType c_Tr_AlphaI_MI = 0.0; - for (int i = 0; i < k; i++) - { - c_Tr_AlphaI_MI += Y24(i, i); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - // el_p is the element position that contains information about - // the CI coefficient, and det up/dn values associated with the - // current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); - const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]); - const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]); - - const2 += alpha_1; - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); + std::fill(pK1.begin(), pK1.end(), 0.0); + std::fill(pK2.begin(), pK2.end(), 0.0); + std::fill(pK3.begin(), pK3.end(), 0.0); + std::fill(pK5.begin(), pK5.end(), 0.0); + + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method + const int* restrict data_it = detData_up.data(); + for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { + const int k = data_it[datum]; + + if (k == 0) { + datum += 3 * k + 1; + } + + else { + // Number of rows and cols of P^T + const int prows = k; + const int pcols = nel; + // Number of rows and cols of Q + const int qrows = nmo; + const int qcols = k; + + Y5.resize(nel, k); + Y6.resize(k, k); + + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 + std::fill(Y5.begin(), Y5.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, + Y5.data() + i, k); + } + + std::fill(Y6.begin(), Y6.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, + (Y6.data() + i * k), 1); + } + + Vector WS; + Vector Piv; + WS.resize(k); + Piv.resize(k); + std::complex logdet = 0.0; + InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); + + Y11.resize(nel, k); + Y23.resize(k, k); + Y24.resize(k, k); + Y25.resize(k, k); + Y26.resize(k, nel); + + std::fill(Y11.begin(), Y11.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, + Y11.data() + i, k); + } + + std::fill(Y23.begin(), Y23.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, + (Y23.data() + i * k), 1); + } + + BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, + Y6.data(), k, RealType(0.0), Y24.data(), k); + BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, + Y24.data(), k, RealType(0.0), Y25.data(), k); + + Y26.resize(k, nel); + + std::fill(Y26.begin(), Y26.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y25.data() + i, k, + Y26.data() + (data_it[datum + 1 + i]), nel); + } + + Y7.resize(k, nel); + + std::fill(Y7.begin(), Y7.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y6.data() + i, k, + Y7.data() + (data_it[datum + 1 + i]), nel); + } + + // c_Tr_AlphaI_MI is a constant contributing to constant const2 + // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] + RealType c_Tr_AlphaI_MI = 0.0; + for (int i = 0; i < k; i++) { + c_Tr_AlphaI_MI += Y24(i, i); + } + + for (int p = 0; p < lookup_tbl[index].size(); p++) { + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant + const int el_p(lookup_tbl[index][p]); + const RealType c = cptr[el_p]; + const size_t up = upC[el_p]; + const size_t down = dnC[el_p]; + + const RealType alpha_1(c * detValues_dn[down] * + detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); + const RealType alpha_2(c * detValues_dn[down] * + detValues_up[up] / detValues_up[0]); + const RealType alpha_3( + c * Oi[down] * detValues_up[up] / detValues_up[0]); + + const2 += alpha_1; + + for (int i = 0; i < k; i++) { + BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, + pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, + pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, + pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, + pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); + } + } + datum += 3 * k + 1; } - } - datum += 3 * k + 1; } - } - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), - nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(), - nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = this->myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dhpsioverpsi[kk] += - T(-0.5 * Y4(i, j) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + - K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + - K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dhpsioverpsi[kk] += - T(-0.5 * (Y4(i, j) - Y4(j, i)) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + - TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + - K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + - K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); - } - else - { - dhpsioverpsi[kk] += T(-0.5 * - (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) - - + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + - K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); - } + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, + RealType(0.0), K1T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, + RealType(0.0), TK1T.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, + pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, + nmo, RealType(0.0), TK2AiB.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, + pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, + nmo, RealType(0.0), TK2XA.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, + pK2.data(), nel, RealType(0.0), K2T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, + RealType(0.0), TK2T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, + Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, + RealType(0.0), K3T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, + RealType(0.0), TK3T.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, + RealType(0.0), K5T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, + RealType(0.0), TK5T.data(), nmo); + + for (int mu = 0, k = parameter_start_index; + k < (parameter_start_index + parameters_size); k++, mu++) { + int kk = this->myVars.where(k); + if (kk >= 0) { + const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); + if (i <= nel - 1 && j > nel - 1) { + dhpsioverpsi[kk] += T(-0.5 * Y4(i, j) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - + K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + + K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - + K1T(j, i) - TK1T(i, j) - + const2 / const1 * K2T(i, j) + + const2 / const1 * K2T(j, i) + + const2 / const1 * TK2T(i, j) + K3T(i, j) - + K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + + TK2T(i, j))); + } + else if (i <= nel - 1 && j <= nel - 1) { + dhpsioverpsi[kk] += T(-0.5 * (Y4(i, j) - Y4(j, i)) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + + TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + + MK2T(j, i) + K1T(i, j) - K1T(j, i) - TK1T(i, j) + + TK1T(j, i) - const2 / const1 * K2T(i, j) + + const2 / const1 * K2T(j, i) + + const2 / const1 * TK2T(i, j) - + const2 / const1 * TK2T(j, i) + K3T(i, j) - + K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); + } + else { + dhpsioverpsi[kk] += T(-0.5 * + (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - + K2XA(i, j) + K2XA(j, i) + + + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + + const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) - + K2T(i, j) + K2T(j, i))); + } + } } - } } -template -void RotatedSPOsT::table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void +RotatedSPOsT::table_method_evalWF(Vector& dlogpsi, const size_t nel, + const size_t nmo, const T& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { - ValueMatrix Table; - ValueMatrix Y5, Y6, Y7; - ValueMatrix pK4, K4T, TK4T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - pK4.resize(nmo, nel); - K4T.resize(nmo, nmo); - TK4T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const T* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - - T* T_(Table.data()); - - const T* restrict A(M_up.data()); - const T* restrict Ainv(Minv_up.data()); - // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF - // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR - // THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - // REMINDER: that the ValueMatrix "matrix" stores data in a row major order - // and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); - - // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK4.begin(), pK4.end(), 0.0); - - // Now we are going to loop through all unique determinants. - // The few lines above are for the reference matrix contribution. - // Although I start the loop below from index 0, the loop only performs - // actions when the index is >= 1 the detData object contains all the - // information about the P^T and Q matrices (projection matrices) needed in - // the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; + ValueMatrix Table; + ValueMatrix Y5, Y6, Y7; + ValueMatrix pK4, K4T, TK4T; + + Table.resize(nel, nmo); + + Bbar.resize(nel, nmo); + + pK4.resize(nmo, nel); + K4T.resize(nmo, nmo); + TK4T.resize(nel, nmo); + + const int parameters_size(m_act_rot_inds.size()); + const int parameter_start_index(0); + + const size_t num_unique_up_dets(detValues_up.size()); + const size_t num_unique_dn_dets(detValues_dn.size()); + + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); + const size_t* restrict upC(C2node_up.data()); + const size_t* restrict dnC(C2node_dn.data()); + + T* T_(Table.data()); + + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, + RealType(0.0), T_, nmo); + + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) + RealType const0(0.0), const1(0.0), const2(0.0); + for (size_t i = 0; i < nc; ++i) { + const RealType c = cptr[i]; + const size_t up = upC[i]; + const size_t down = dnC[i]; + + const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); } - else - { - // Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - // Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - // Any matrix multiplication of P^T or Q is simply a projection - // Explicit matrix multiplication can be avoided; instead column or - // row copying can be done BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col - // element), Pointer to resultant matrix + place to begin - // pasting, storage spacing of resultant matrix) - // For example the next 4 lines is the matrix multiplication of T*Q - // = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - // el_p is the element position that contains information about - // the CI coefficient, and det up/dn values associated with the - // current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent)); - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); + std::fill(pK4.begin(), pK4.end(), 0.0); + + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method + const int* restrict data_it = detData_up.data(); + for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { + const int k = data_it[datum]; + + if (k == 0) { + datum += 3 * k + 1; + } + + else { + // Number of rows and cols of P^T + const int prows = k; + const int pcols = nel; + // Number of rows and cols of Q + const int qrows = nmo; + const int qcols = k; + + Y5.resize(nel, k); + Y6.resize(k, k); + + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 + std::fill(Y5.begin(), Y5.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, + Y5.data() + i, k); + } + + std::fill(Y6.begin(), Y6.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, + (Y6.data() + i * k), 1); + } + + Vector WS; + Vector Piv; + WS.resize(k); + Piv.resize(k); + std::complex logdet = 0.0; + InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); + + Y7.resize(k, nel); + + std::fill(Y7.begin(), Y7.end(), 0.0); + for (int i = 0; i < k; i++) { + BLAS::copy(k, Y6.data() + i, k, + Y7.data() + (data_it[datum + 1 + i]), nel); + } + + for (int p = 0; p < lookup_tbl[index].size(); p++) { + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant + const int el_p(lookup_tbl[index][p]); + const RealType c = cptr[el_p]; + const size_t up = upC[el_p]; + const size_t down = dnC[el_p]; + + const RealType alpha_4(c * detValues_dn[down] * + detValues_up[up] * (1 / psiCurrent)); + + for (int i = 0; i < k; i++) { + BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, + pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); + } + } + datum += 3 * k + 1; } - } - datum += 3 * k + 1; } - } - - BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = this->myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dlogpsi[kk] += - T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); - } - else - { - dlogpsi[kk] += T((K4T(i, j) - K4T(j, i))); - } + + BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, + RealType(0.0), K4T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, + RealType(0.0), TK4T.data(), nmo); + + for (int mu = 0, k = parameter_start_index; + k < (parameter_start_index + parameters_size); k++, mu++) { + int kk = this->myVars.where(k); + if (kk >= 0) { + const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); + if (i <= nel - 1 && j > nel - 1) { + dlogpsi[kk] += T(detValues_up[0] * (Table(i, j)) * const0 * + (1 / psiCurrent) + + (K4T(i, j) - K4T(j, i) - TK4T(i, j))); + } + else if (i <= nel - 1 && j <= nel - 1) { + dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * + const0 * (1 / psiCurrent) + + (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); + } + else { + dlogpsi[kk] += T((K4T(i, j) - K4T(j, i))); + } + } } - } } -template -std::unique_ptr> RotatedSPOsT::makeClone() const +template +std::unique_ptr> +RotatedSPOsT::makeClone() const { - auto myclone = std::make_unique(SPOSetT::getName(), std::unique_ptr>(Phi->makeClone())); - - myclone->params = this->params; - myclone->params_supplied = this->params_supplied; - myclone->m_act_rot_inds = this->m_act_rot_inds; - myclone->m_full_rot_inds = this->m_full_rot_inds; - myclone->myVars = this->myVars; - myclone->myVarsFull = this->myVarsFull; - myclone->history_params_ = this->history_params_; - myclone->use_global_rot_ = this->use_global_rot_; - return myclone; + auto myclone = std::make_unique( + SPOSetT::getName(), std::unique_ptr>(Phi->makeClone())); + + myclone->params = this->params; + myclone->params_supplied = this->params_supplied; + myclone->m_act_rot_inds = this->m_act_rot_inds; + myclone->m_full_rot_inds = this->m_full_rot_inds; + myclone->myVars = this->myVars; + myclone->myVarsFull = this->myVarsFull; + myclone->history_params_ = this->history_params_; + myclone->use_global_rot_ = this->use_global_rot_; + return myclone; } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h index 77daf7fd92..971d2528b3 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.h +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -19,401 +19,410 @@ namespace qmcplusplus { -template +template class RotatedSPOsT; namespace testing { -opt_variables_type& getMyVarsFull(RotatedSPOsT& rot); -opt_variables_type& getMyVarsFull(RotatedSPOsT& rot); -std::vector>& getHistoryParams(RotatedSPOsT& rot); -std::vector>& getHistoryParams(RotatedSPOsT& rot); +OptVariablesType& +getMyVarsFull(RotatedSPOsT& rot); +OptVariablesType& +getMyVarsFull(RotatedSPOsT& rot); +std::vector>& +getHistoryParams(RotatedSPOsT& rot); +std::vector>& +getHistoryParams(RotatedSPOsT& rot); } // namespace testing -template -class RotatedSPOsT : public SPOSetT, public OptimizableObject +template +class RotatedSPOsT : public SPOSetT, public OptimizableObjectT { public: - using IndexType = typename SPOSetT::IndexType; - using RealType = typename SPOSetT::RealType; - using ValueType = typename SPOSetT::ValueType; - using FullRealType = typename SPOSetT::FullRealType; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessVector = typename SPOSetT::HessVector; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGVector = typename SPOSetT::GGGVector; - using GGGMatrix = typename SPOSetT::GGGMatrix; - - // constructor - RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos); - // destructor - ~RotatedSPOsT() override; - - std::string getClassName() const override { return "RotatedSPOsT"; } - bool isOptimizable() const override { return true; } - bool isOMPoffload() const override { return Phi->isOMPoffload(); } - bool hasIonDerivs() const override { return Phi->hasIonDerivs(); } - - // Vector of rotation matrix indices - using RotationIndices = std::vector>; - - // Active orbital rotation parameter indices - RotationIndices m_act_rot_inds; - - // Full set of rotation values for global rotation - RotationIndices m_full_rot_inds; - - // Construct a list of the matrix indices for non-zero rotation parameters. - // (The structure for a sparse representation of the matrix) - // Only core->active rotations are created. - static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); - - // Construct a list for all the matrix indices, including core->active, - // core->core and active->active - static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); - - // Fill in antisymmetric matrix from the list of rotation parameter indices - // and a list of parameter values. - // This function assumes rot_mat is properly sized upon input and is set to - // zero. - static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat); - - // Extract the list of rotation parameters from the entries in an - // antisymmetric matrix This function expects rot_indices and param are the - // same length. - static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param); - - // function to perform orbital rotations - void apply_rotation(const std::vector& param, bool use_stored_copy); - - // For global rotation, inputs are the old parameters and the delta - // parameters. The corresponding rotation matrices are constructed, - // multiplied together, and the new parameters extracted. The new rotation - // is applied to the underlying SPO coefficients - void applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param); - - // Perform the construction of matrices and extraction of parameters for a - // delta rotation. Split out and made static for testing. - static void constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat); - - // When initializing the rotation from VP files - // This function applies the rotation history - void applyRotationHistory(); - - // This function applies the global rotation (similar to apply_rotation, but - // for the full set of rotation parameters) - void applyFullRotation(const std::vector& full_param, bool use_stored_copy); - - // Compute matrix exponential of an antisymmetric matrix (result is rotation - // matrix) - static void exponentiate_antisym_matrix(ValueMatrix& mat); - - // Compute matrix log of rotation matrix to produce antisymmetric matrix - static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); - - // A particular SPOSet used for Orbitals - std::unique_ptr> Phi; - - /// Set the rotation parameters (usually from input file) - void setRotationParameters(const std::vector& param_list); - - /// the number of electrons of the majority spin - size_t nel_major_; - - std::unique_ptr> makeClone() const override; - - // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the - // Determinant part of the wfn myG_J is the Gradient of the all other parts - // of the wavefunction (typically just the Jastrow). - // It represents \frac{\nabla\psi_{J}}{\psi_{J}} - // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The - // Laplacian portion IMPORTANT NOTE: The value of P.L holds \nabla^2 - // ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J - // will hold - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - - ValueMatrix Bbar; - ValueMatrix psiM_inv; - ValueMatrix psiM_all; - GradMatrix dpsiM_all; - ValueMatrix d2psiM_all; - - // Single Slater creation - void buildOptVariables(size_t nel); - - // For the MSD case rotations must be created in MultiSlaterDetTableMethod - // class - void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) override; - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) override; - - // helper function to evaluatederivative; evaluate orbital rotation - // parameter derivative using table method - void table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - void table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); } - - void checkInVariablesExclusive(opt_variables_type& active) override - { - if (this->myVars.size()) - active.insertFrom(this->myVars); - } - - void checkOutVariables(const opt_variables_type& active) override { this->myVars.getIndex(active); } - - /// reset - void resetParametersExclusive(const opt_variables_type& active) override; - - void writeVariationalParameters(hdf_archive& hout) override; - - void readVariationalParameters(hdf_archive& hin) override; - - //********************************************************************************* - // the following functions simply call Phi's corresponding functions - void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); } - - void checkObject() const override { Phi->checkObject(); } - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateValue(P, iat, psi); - } - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); - } - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - } - - void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) override; - - void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi); - } - - // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, - // GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first, - // last, grad_grad_grad_logdet); } - - /// Use history list (false) or global rotation (true) - void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; } + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; + using FullRealType = typename SPOSetT::FullRealType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessVector = typename SPOSetT::HessVector; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + + // constructor + RotatedSPOsT( + const std::string& my_name, std::unique_ptr>&& spos); + // destructor + ~RotatedSPOsT() override; + + std::string + getClassName() const override + { + return "RotatedSPOsT"; + } + bool + isOptimizable() const override + { + return true; + } + bool + isOMPoffload() const override + { + return Phi->isOMPoffload(); + } + bool + hasIonDerivs() const override + { + return Phi->hasIonDerivs(); + } + + // Vector of rotation matrix indices + using RotationIndices = std::vector>; + + // Active orbital rotation parameter indices + RotationIndices m_act_rot_inds; + + // Full set of rotation values for global rotation + RotationIndices m_full_rot_inds; + + // Construct a list of the matrix indices for non-zero rotation parameters. + // (The structure for a sparse representation of the matrix) + // Only core->active rotations are created. + static void + createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); + + // Construct a list for all the matrix indices, including core->active, + // core->core and active->active + static void + createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); + + // Fill in antisymmetric matrix from the list of rotation parameter indices + // and a list of parameter values. + // This function assumes rot_mat is properly sized upon input and is set to + // zero. + static void + constructAntiSymmetricMatrix(const RotationIndices& rot_indices, + const std::vector& param, ValueMatrix& rot_mat); + + // Extract the list of rotation parameters from the entries in an + // antisymmetric matrix This function expects rot_indices and param are the + // same length. + static void + extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, + const ValueMatrix& rot_mat, std::vector& param); + + // function to perform orbital rotations + void + apply_rotation(const std::vector& param, bool use_stored_copy); + + // For global rotation, inputs are the old parameters and the delta + // parameters. The corresponding rotation matrices are constructed, + // multiplied together, and the new parameters extracted. The new rotation + // is applied to the underlying SPO coefficients + void + applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + std::vector& new_param); + + // Perform the construction of matrices and extraction of parameters for a + // delta rotation. Split out and made static for testing. + static void + constructDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, std::vector& new_param, + ValueMatrix& new_rot_mat); + + // When initializing the rotation from VP files + // This function applies the rotation history + void + applyRotationHistory(); + + // This function applies the global rotation (similar to apply_rotation, but + // for the full set of rotation parameters) + void + applyFullRotation( + const std::vector& full_param, bool use_stored_copy); + + // Compute matrix exponential of an antisymmetric matrix (result is rotation + // matrix) + static void + exponentiate_antisym_matrix(ValueMatrix& mat); + + // Compute matrix log of rotation matrix to produce antisymmetric matrix + static void + log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); + + // A particular SPOSet used for Orbitals + std::unique_ptr> Phi; + + /// Set the rotation parameters (usually from input file) + void + setRotationParameters(const std::vector& param_list); + + /// the number of electrons of the majority spin + size_t nel_major_; + + std::unique_ptr> + makeClone() const override; + + // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the + // Determinant part of the wfn myG_J is the Gradient of the all other parts + // of the wavefunction (typically just the Jastrow). + // It represents \frac{\nabla\psi_{J}}{\psi_{J}} + // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The + // Laplacian portion IMPORTANT NOTE: The value of P.L holds \nabla^2 + // ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J + // will hold + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; + + ValueMatrix Bbar; + ValueMatrix psiM_inv; + ValueMatrix psiM_all; + GradMatrix dpsiM_all; + ValueMatrix d2psiM_all; + + // Single Slater creation + void + buildOptVariables(size_t nel); + + // For the MSD case rotations must be created in MultiSlaterDetTableMethod + // class + void + buildOptVariables(const RotationIndices& rotations, + const RotationIndices& full_rotations); + + void + evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, + Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, + const int& LastIndex) override; + + void + evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, + int LastIndex) override; + + void + evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, + Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, + const std::vector& Coeff, const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const GradMatrix& grads_up, + const GradMatrix& grads_dn, const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const GradMatrix& B_grad, + const ValueMatrix& B_lapl, const std::vector& detData_up, + const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl) override; + + void + evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + const ValueType& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const std::vector& detData_up, + const std::vector>& lookup_tbl) override; + + // helper function to evaluatederivative; evaluate orbital rotation + // parameter derivative using table method + void + table_method_eval(Vector& dlogpsi, Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, + const size_t nel, const size_t nmo, const T& psiCurrent, + const std::vector& Coeff, const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const GradMatrix& grads_up, + const GradMatrix& grads_dn, const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const GradMatrix& B_grad, + const ValueMatrix& B_lapl, const std::vector& detData_up, + const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl); + + void + table_method_evalWF(Vector& dlogpsi, const size_t nel, const size_t nmo, + const T& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const std::vector& detData_up, + const std::vector>& lookup_tbl); + + void + extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs) override + { + opt_obj_refs.push_back(*this); + } + + void + checkInVariablesExclusive(OptVariablesType& active) override + { + if (this->myVars.size()) + active.insertFrom(this->myVars); + } + + void + checkOutVariables(const OptVariablesType& active) override + { + this->myVars.getIndex(active); + } + + /// reset + void + resetParametersExclusive(const OptVariablesType& active) override; + + void + writeVariationalParameters(hdf_archive& hout) override; + + void + readVariationalParameters(hdf_archive& hin) override; + + //********************************************************************************* + // the following functions simply call Phi's corresponding functions + void + setOrbitalSetSize(int norbs) override + { + Phi->setOrbitalSetSize(norbs); + } + + void + checkObject() const override + { + Phi->checkObject(); + } + + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateValue(P, iat, psi); + } + + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); + } + + void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios) override + { + Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + } + + void + evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesType& optvars, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, + int FirstIndex, int LastIndex) override; + + void + evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void + evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + Phi->evaluateVGHGH( + P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + Phi->evaluate_notranspose( + P, first, last, logdet, dlogdet, grad_grad_logdet); + } + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, + grad_grad_logdet, grad_grad_grad_logdet); + } + + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, + GradMatrix& grad_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); + } + + void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, + grad_grad_phi, grad_lapl_phi); + } + + // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, + // GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first, + // last, grad_grad_grad_logdet); } + + /// Use history list (false) or global rotation (true) + void + set_use_global_rotation(bool use_global_rotation) + { + use_global_rot_ = use_global_rotation; + } private: - /// true if SPO parameters (orbital rotation parameters) have been supplied - /// by input - bool params_supplied; - /// list of supplied orbital rotation parameters - std::vector params; - - /// Full set of rotation matrix parameters for use in global rotation method - opt_variables_type myVarsFull; - - /// List of previously applied parameters - std::vector> history_params_; - - /// Use global rotation or history list - bool use_global_rot_ = true; - - friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT& rot); - friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT& rot); - friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); - friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); + /// true if SPO parameters (orbital rotation parameters) have been supplied + /// by input + bool params_supplied; + /// list of supplied orbital rotation parameters + std::vector params; + + /// Full set of rotation matrix parameters for use in global rotation method + OptVariablesType myVarsFull; + + /// List of previously applied parameters + std::vector> history_params_; + + /// Use global rotation or history list + bool use_global_rot_ = true; + + friend OptVariablesType& + testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesType& + testing::getMyVarsFull(RotatedSPOsT& rot); + friend std::vector>& + testing::getHistoryParams(RotatedSPOsT& rot); + friend std::vector>& + testing::getHistoryParams(RotatedSPOsT& rot); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp index b98952f779..12148277a0 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp @@ -26,11 +26,10 @@ #include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h" #include "QMCWaveFunctions/SPOSetScannerT.h" #if OHMMS_DIM == 3 +#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" - #if defined(QMC_COMPLEX) #include "QMCWaveFunctions/EinsplineSpinorSetBuilder.h" -#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" #endif #if defined(HAVE_EINSPLINE) @@ -45,6 +44,29 @@ namespace qmcplusplus { +template +struct LCAOSpinorBuilderMaker +{ + template + std::unique_ptr> + operator()(TArgs&&...) const + { + throw std::runtime_error( + "lcao spinors not compatible with non-complex value types"); + } +}; + +template +struct LCAOSpinorBuilderMaker> +{ + template + std::unique_ptr>> + operator()(TArgs&&... args) const + { + return std::make_unique>>( + std::forward(args)...); + } +}; template const SPOSetT* @@ -67,7 +89,7 @@ SPOSetBuilderFactoryT::getSPOSet(const std::string& name) const */ template SPOSetBuilderFactoryT::SPOSetBuilderFactoryT( - Communicate* comm, ParticleSet& els, const PSetMap& psets) : + Communicate* comm, ParticleSetT& els, const PSetMap& psets) : MPIObjectBase(comm), targetPtcl(els), ptclPool(psets) @@ -145,21 +167,22 @@ SPOSetBuilderFactoryT::createSPOSetBuilder(xmlNodePtr rootNode) } } else if (type == "molecularorbital" || type == "mo") { - ParticleSet* ions = nullptr; + ParticleSetT* ions = nullptr; // initialize with the source tag auto pit(ptclPool.find(sourceOpt)); if (pit == ptclPool.end()) PRE.error("Missing basisset/@source.", true); else ions = pit->second.get(); - if (targetPtcl.isSpinor()) -#ifdef QMC_COMPLEX - bb = std::make_unique>( - targetPtcl, *ions, myComm, rootNode); -#else - PRE.error("Use of lcao spinors requires QMC_COMPLEX=1. Rebuild " - "with this option"); -#endif + if (targetPtcl.isSpinor()) { + try { + bb = LCAOSpinorBuilderMaker{}( + targetPtcl, *ions, myComm, rootNode); + } + catch (const std::exception& e) { + PRE.error(e.what()); + } + } else bb = std::make_unique>( targetPtcl, *ions, myComm, rootNode); @@ -253,11 +276,8 @@ SPOSetBuilderFactoryT::addSPOSet(std::unique_ptr> spo) template std::string SPOSetBuilderFactoryT::basisset_tag = "basisset"; -#ifdef QMC_COMPLEX template class SPOSetBuilderFactoryT>; template class SPOSetBuilderFactoryT>; -#else template class SPOSetBuilderFactoryT; template class SPOSetBuilderFactoryT; -#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h index ce1e9b89da..9841988d00 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h @@ -29,7 +29,8 @@ class SPOSetBuilderFactoryT : public MPIObjectBase { public: using SPOMap = typename SPOSetT::SPOMap; - using PSetMap = std::map>; + using PSetMap = + std::map>>; /** constructor * \param comm communicator @@ -37,7 +38,7 @@ class SPOSetBuilderFactoryT : public MPIObjectBase * \param ions reference to the ions */ SPOSetBuilderFactoryT( - Communicate* comm, ParticleSet& els, const PSetMap& psets); + Communicate* comm, ParticleSetT& els, const PSetMap& psets); ~SPOSetBuilderFactoryT(); @@ -74,7 +75,7 @@ class SPOSetBuilderFactoryT : public MPIObjectBase private: /// reference to the target particle - ParticleSet& targetPtcl; + ParticleSetT& targetPtcl; /// reference to the particle pool const PSetMap& ptclPool; diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h index 9a3bb418a1..e4841b90bb 100644 --- a/src/QMCWaveFunctions/SPOSetScannerT.h +++ b/src/QMCWaveFunctions/SPOSetScannerT.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // @@ -9,207 +9,276 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SPOSET_SCANNERT_H #define QMCPLUSPLUS_SPOSET_SCANNERT_H -#include "Particle/ParticleSet.h" +#include "OhmmsData/AttributeSet.h" +#include "Particle/ParticleSetT.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "QMCWaveFunctions/SPOSetT.h" -#include "OhmmsData/AttributeSet.h" namespace qmcplusplus { +template +struct OutputReportMakerBase +{ + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + + const ValueVector& SPO_v_avg; + const ValueVector& SPO_l_avg; + const GradVector& SPO_g_avg; + int nknots; +}; + +template +struct OutputReportMaker : OutputReportMakerBase +{ + using RealType = typename SPOSetT::RealType; + + void + operator()(std::ofstream& output_report) const + { + output_report + << "# Report: Orb Value_avg Gradients_avg Laplacian_avg" + << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific + << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_g_avg[iorb][0] * one_over_nknots << " " + << this->SPO_g_avg[iorb][1] * one_over_nknots << " " + << this->SPO_g_avg[iorb][2] * one_over_nknots << " " + << this->SPO_l_avg[iorb] * one_over_nknots + << std::fixed << std::endl; + } + } +}; + +template +struct OutputReportMaker> : + OutputReportMakerBase> +{ + using RealType = typename SPOSetT::RealType; + + void + operator()(std::ofstream& output_report) const + { + output_report + << "# Report: Orb Value_avg I/R Gradients_avg Laplacian_avg" + << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific + << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_v_avg[iorb].imag() / + this->SPO_v_avg[iorb].real() + << " " << this->SPO_g_avg[iorb][0] * one_over_nknots + << " " << this->SPO_g_avg[iorb][1] * one_over_nknots + << " " << this->SPO_g_avg[iorb][2] * one_over_nknots + << " " << this->SPO_l_avg[iorb] * one_over_nknots + << std::fixed << std::endl; + } + } +}; + /** a scanner for all the SPO sets. - */ + */ template class SPOSetScannerT { public: - using PtclPool = std::map>; - using SPOSetMap = typename SPOSetT::SPOMap; - using RealType = typename SPOSetT::RealType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - - RealType myfabs(RealType s) { return std::fabs(s); } - template - std::complex myfabs(std::complex& s) - { - return std::complex(myfabs(s.real()), myfabs(s.imag())); - } - template - TinyVector myfabs(TinyVector& s) - { - return TinyVector(myfabs(s[0]), myfabs(s[1]), myfabs(s[2])); - } - - const SPOSetMap& sposets; - ParticleSet& target; - const PtclPool& ptcl_pool_; - ParticleSet* ions; - - // construction/destruction - SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSet& targetPtcl, const PtclPool& psets) - : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){}; - //~SPOSetScannerT(){}; - - // processing scanning - void put(xmlNodePtr cur) - { - app_log() << "Entering the SPO set scanner!" << std::endl; - // check in the source particle set and search for it in the pool. - std::string sourcePtcl("ion0"); - OhmmsAttributeSet aAttrib; - aAttrib.add(sourcePtcl, "source"); - aAttrib.put(cur); - auto pit(ptcl_pool_.find(sourcePtcl)); - if (pit == ptcl_pool_.end()) - app_log() << "Source particle set not found. Can not be used as reference point." << std::endl; - else - ions = pit->second.get(); - - // scanning the SPO sets - xmlNodePtr cur_save = cur; - for (const auto& [name, sposet] : sposets) + using PtclPool = + std::map>>; + using SPOSetMap = typename SPOSetT::SPOMap; + using RealType = typename SPOSetT::RealType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + + RealType + myfabs(RealType s) + { + return std::fabs(s); + } + template + std::complex + myfabs(std::complex& s) + { + return std::complex(myfabs(s.real()), myfabs(s.imag())); + } + template + TinyVector + myfabs(TinyVector& s) + { + return TinyVector( + myfabs(s[0]), myfabs(s[1]), myfabs(s[2])); + } + + const SPOSetMap& sposets; + ParticleSetT& target; + const PtclPool& ptcl_pool_; + ParticleSetT* ions; + + // construction/destruction + SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT& targetPtcl, + const PtclPool& psets) : + sposets(sposets_in), + target(targetPtcl), + ptcl_pool_(psets), + ions(0){}; + //~SPOSetScannerT(){}; + + // processing scanning + void + put(xmlNodePtr cur) { - app_log() << " Processing SPO " << sposet->getName() << std::endl; - // scanning the paths - cur = cur_save->children; - while (cur != NULL) - { - std::string trace_name("no name"); + app_log() << "Entering the SPO set scanner!" << std::endl; + // check in the source particle set and search for it in the pool. + std::string sourcePtcl("ion0"); OhmmsAttributeSet aAttrib; - aAttrib.add(trace_name, "name"); + aAttrib.add(sourcePtcl, "source"); aAttrib.put(cur); - std::string cname(getNodeName(cur)); - std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name); - if (cname == "path") - { - app_log() << " Scanning a " << cname << " called " << trace_name << " and writing to " - << prefix + "_v/g/l/report.dat" << std::endl; - auto spo = sposet->makeClone(); - scan_path(cur, *spo, prefix); - } + auto pit(ptcl_pool_.find(sourcePtcl)); + if (pit == ptcl_pool_.end()) + app_log() << "Source particle set not found. Can not be used as " + "reference point." + << std::endl; else - { - if (cname != "text" && cname != "comment") - app_log() << " Unknown type of scanning " << cname << std::endl; + ions = pit->second.get(); + + // scanning the SPO sets + xmlNodePtr cur_save = cur; + for (const auto& [name, sposet] : sposets) { + app_log() << " Processing SPO " << sposet->getName() << std::endl; + // scanning the paths + cur = cur_save->children; + while (cur != NULL) { + std::string trace_name("no name"); + OhmmsAttributeSet aAttrib; + aAttrib.add(trace_name, "name"); + aAttrib.put(cur); + std::string cname(getNodeName(cur)); + std::string prefix( + sposet->getName() + "_" + cname + "_" + trace_name); + if (cname == "path") { + app_log() << " Scanning a " << cname << " called " + << trace_name << " and writing to " + << prefix + "_v/g/l/report.dat" << std::endl; + auto spo = sposet->makeClone(); + scan_path(cur, *spo, prefix); + } + else { + if (cname != "text" && cname != "comment") + app_log() << " Unknown type of scanning " << cname + << std::endl; + } + cur = cur->next; + } } - cur = cur->next; - } - } - app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl; - } - - // scanning a path - void scan_path(xmlNodePtr cur, SPOSetT& sposet, std::string prefix) - { - std::string file_name; - file_name = prefix + "_v.dat"; - std::ofstream output_v(file_name.c_str()); - file_name = prefix + "_g.dat"; - std::ofstream output_g(file_name.c_str()); - file_name = prefix + "_l.dat"; - std::ofstream output_l(file_name.c_str()); - file_name = prefix + "_report.dat"; - std::ofstream output_report(file_name.c_str()); - - int nknots(2); - int from_atom(-1); - int to_atom(-1); - TinyVector from_pos(0.0, 0.0, 0.0); - TinyVector to_pos(0.0, 0.0, 0.0); - - OhmmsAttributeSet aAttrib; - aAttrib.add(nknots, "nknots"); - aAttrib.add(from_atom, "from_atom"); - aAttrib.add(to_atom, "to_atom"); - aAttrib.add(from_pos, "from_pos"); - aAttrib.add(to_pos, "to_pos"); - aAttrib.put(cur); - - // sanity check - if (nknots < 2) - nknots = 2; - // check out the reference atom coordinates - if (ions) - { - if (from_atom >= 0 && from_atom < ions->R.size()) - from_pos = ions->R[from_atom]; - if (to_atom >= 0 && to_atom < ions->R.size()) - to_pos = ions->R[to_atom]; + app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl; } - // prepare a fake particle set - ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg; - GradVector SPO_g, SPO_g_avg; - int OrbitalSize(sposet.size()); - SPO_v.resize(OrbitalSize); - SPO_g.resize(OrbitalSize); - SPO_l.resize(OrbitalSize); - SPO_v_avg.resize(OrbitalSize); - SPO_g_avg.resize(OrbitalSize); - SPO_l_avg.resize(OrbitalSize); - SPO_v_avg = 0.0; - SPO_g_avg = 0.0; - SPO_l_avg = 0.0; - double Delta = 1.0 / (nknots - 1); - int elec_count = target.R.size(); - auto R_saved = target.R; - ParticleSet::SingleParticlePos zero_pos(0.0, 0.0, 0.0); - for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) + // scanning a path + void + scan_path(xmlNodePtr cur, SPOSetT& sposet, std::string prefix) { - if (ind == elec_count) - ind = 0; - target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0]; - target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1]; - target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2]; - target.makeMove(ind, zero_pos); - sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l); - std::ostringstream o; - o << "x_y_z " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " " - << target.R[ind][2]; - output_v << o.str() << " : " << std::scientific << std::setprecision(12); - output_g << o.str() << " : " << std::scientific << std::setprecision(12); - output_l << o.str() << " : " << std::scientific << std::setprecision(12); - for (int iorb = 0; iorb < OrbitalSize; iorb++) - { - SPO_v_avg[iorb] += myfabs(SPO_v[iorb]); - SPO_g_avg[iorb] += myfabs(SPO_g[iorb]); - SPO_l_avg[iorb] += myfabs(SPO_l[iorb]); - output_v << SPO_v[iorb] << " "; - output_g << SPO_g[iorb][0] << " " << SPO_g[iorb][1] << " " << SPO_g[iorb][2] << " "; - output_l << SPO_l[iorb] << " "; - } - output_v << std::endl; - output_g << std::endl; - output_l << std::endl; + std::string file_name; + file_name = prefix + "_v.dat"; + std::ofstream output_v(file_name.c_str()); + file_name = prefix + "_g.dat"; + std::ofstream output_g(file_name.c_str()); + file_name = prefix + "_l.dat"; + std::ofstream output_l(file_name.c_str()); + file_name = prefix + "_report.dat"; + std::ofstream output_report(file_name.c_str()); + + int nknots(2); + int from_atom(-1); + int to_atom(-1); + TinyVector from_pos(0.0, 0.0, 0.0); + TinyVector to_pos(0.0, 0.0, 0.0); + + OhmmsAttributeSet aAttrib; + aAttrib.add(nknots, "nknots"); + aAttrib.add(from_atom, "from_atom"); + aAttrib.add(to_atom, "to_atom"); + aAttrib.add(from_pos, "from_pos"); + aAttrib.add(to_pos, "to_pos"); + aAttrib.put(cur); + + // sanity check + if (nknots < 2) + nknots = 2; + // check out the reference atom coordinates + if (ions) { + if (from_atom >= 0 && from_atom < ions->R.size()) + from_pos = ions->R[from_atom]; + if (to_atom >= 0 && to_atom < ions->R.size()) + to_pos = ions->R[to_atom]; + } + + // prepare a fake particle set + ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg; + GradVector SPO_g, SPO_g_avg; + int OrbitalSize(sposet.size()); + SPO_v.resize(OrbitalSize); + SPO_g.resize(OrbitalSize); + SPO_l.resize(OrbitalSize); + SPO_v_avg.resize(OrbitalSize); + SPO_g_avg.resize(OrbitalSize); + SPO_l_avg.resize(OrbitalSize); + SPO_v_avg = 0.0; + SPO_g_avg = 0.0; + SPO_l_avg = 0.0; + double Delta = 1.0 / (nknots - 1); + int elec_count = target.R.size(); + auto R_saved = target.R; + typename ParticleSetT::SingleParticlePos zero_pos(0.0, 0.0, 0.0); + for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) { + if (ind == elec_count) + ind = 0; + target.R[ind][0] = + (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0]; + target.R[ind][1] = + (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1]; + target.R[ind][2] = + (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2]; + target.makeMove(ind, zero_pos); + sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l); + std::ostringstream o; + o << "x_y_z " << std::fixed << std::setprecision(7) + << target.R[ind][0] << " " << target.R[ind][1] << " " + << target.R[ind][2]; + output_v << o.str() << " : " << std::scientific + << std::setprecision(12); + output_g << o.str() << " : " << std::scientific + << std::setprecision(12); + output_l << o.str() << " : " << std::scientific + << std::setprecision(12); + for (int iorb = 0; iorb < OrbitalSize; iorb++) { + SPO_v_avg[iorb] += myfabs(SPO_v[iorb]); + SPO_g_avg[iorb] += myfabs(SPO_g[iorb]); + SPO_l_avg[iorb] += myfabs(SPO_l[iorb]); + output_v << SPO_v[iorb] << " "; + output_g << SPO_g[iorb][0] << " " << SPO_g[iorb][1] << " " + << SPO_g[iorb][2] << " "; + output_l << SPO_l[iorb] << " "; + } + output_v << std::endl; + output_g << std::endl; + output_l << std::endl; + } + // restore the whole target. + target.R = R_saved; + target.update(); + OutputReportMaker{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}( + output_report); + output_v.close(); + output_g.close(); + output_l.close(); + output_report.close(); } - // restore the whole target. - target.R = R_saved; - target.update(); -#ifdef QMC_COMPLEX - output_report << "# Report: Orb Value_avg I/R Gradients_avg Laplacian_avg" << std::endl; -#else - output_report << "# Report: Orb Value_avg Gradients_avg Laplacian_avg" << std::endl; -#endif - for (int iorb = 0; iorb < OrbitalSize; iorb++) - output_report << "\t" << iorb << " " << std::scientific - << SPO_v_avg[iorb] * static_cast(1.0 / nknots) << " " -#ifdef QMC_COMPLEX - << SPO_v_avg[iorb].imag() / SPO_v_avg[iorb].real() << " " -#endif - << SPO_g_avg[iorb][0] * static_cast(1.0 / nknots) << " " - << SPO_g_avg[iorb][1] * static_cast(1.0 / nknots) << " " - << SPO_g_avg[iorb][2] * static_cast(1.0 / nknots) << " " - << SPO_l_avg[iorb] * static_cast(1.0 / nknots) << std::fixed << std::endl; - output_v.close(); - output_g.close(); - output_l.close(); - output_report.close(); - } }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp index c20bda6513..2e6521e115 100644 --- a/src/QMCWaveFunctions/SPOSetT.cpp +++ b/src/QMCWaveFunctions/SPOSetT.cpp @@ -1,20 +1,25 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory Raymond Clay III, +// j.k.rofling@gmail.com, Lawrence Livermore National +// Laboratory Jeremy McMinnis, jmcminis@gmail.com, University +// of Illinois at Urbana-Champaign Jaron T. Krogel, +// krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim +// Kim, jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory William +// F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// #include "SPOSetT.h" @@ -24,409 +29,382 @@ namespace qmcplusplus { -template -SPOSetT::SPOSetT(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0) -{} - -template -void SPOSetT::extractOptimizableObjectRefs(UniqueOptObjRefs&) +template +SPOSetT::SPOSetT(const std::string& my_name) : + my_name_(my_name), + OrbitalSetSize(0) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::extractOptimizableObjectRefs " - "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::checkOutVariables(const opt_variables_type& active) +template +void +SPOSetT::extractOptimizableObjectRefs(UniqueOptObjRefsT&) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::checkOutVariables " - "must be overloaded when the SPOSet is optimizable."); + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::extractOptimizableObjectRefs " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void +SPOSetT::checkOutVariables(const OptVariablesType& active) { - assert(psi.size() == psiinv.size()); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::checkOutVariables " + "must be overloaded when the SPOSet is optimizable."); } - -template -void SPOSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void +SPOSetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) { - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - { - Vector invRow(const_cast(invRow_ptr_list[iw]), psi_list[iw].get().size()); - spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]); - } + assert(psi.size() == psiinv.size()); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } } -template -void SPOSetT::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) +template +void +SPOSetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { - throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin"); + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) { + Vector invRow( + const_cast(invRow_ptr_list[iw]), psi_list[iw].get().size()); + spo_list[iw].evaluateDetRatios( + vp_list[iw], psi_list[iw], invRow, ratios_list[iw]); + } } -template -void SPOSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void +SPOSetT::evaluateVGL_spin(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) { - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]); + throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin"); } -template -void SPOSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const +template +void +SPOSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]); + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluateVGL( + P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]); } -template -void SPOSetT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const +template +void +SPOSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const { - throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n"); + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]); } +template +void +SPOSetT::mw_evaluateVGLWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const +{ + throw std::runtime_error( + getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n"); +} -template -void SPOSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const +template +void +SPOSetT::mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, std::vector& grads) const { - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - GradVector dphi_v(norb_requested); - for (int iw = 0; iw < nw; iw++) - { - ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested); - ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested); - spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v); - - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested); - grads[iw] = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw]; - - // transpose the array of gradients to SoA in phi_vgl_v - for (size_t idim = 0; idim < DIM; idim++) - { - T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0); - for (size_t iorb = 0; iorb < norb_requested; iorb++) - phi_g[iorb] = dphi_v[iorb][idim]; + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + GradVector dphi_v(norb_requested); + for (int iw = 0; iw < nw; iw++) { + ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested); + ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested); + spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v); + + ratios[iw] = + simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested); + grads[iw] = + simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / + ratios[iw]; + + // transpose the array of gradients to SoA in phi_vgl_v + for (size_t idim = 0; idim < DIM; idim++) { + T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0); + for (size_t iorb = 0; iorb < norb_requested; iorb++) + phi_g[iorb] = dphi_v[iorb][idim]; + } } - } - phi_vgl_v.updateTo(); + phi_vgl_v.updateTo(); } -template -void SPOSetT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const +template +void +SPOSetT::mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, std::vector& grads, + std::vector& spingrads) const { - throw std::runtime_error("Need specialization of " + getClassName() + - "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n"); + throw std::runtime_error("Need specialization of " + getClassName() + + "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n"); } -template -void SPOSetT::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) +template +void +SPOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, + GGGMatrix& grad_grad_grad_logdet) { - throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n"); + throw std::runtime_error( + "Need specialization of SPOSet::evaluateThirdDeriv(). \n"); } -template -void SPOSetT::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) +template +void +SPOSetT::evaluate_notranspose_spin(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) { - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n"); + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) " + "(vector quantities)\n"); } -template -void SPOSetT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void +SPOSetT::mw_evaluate_notranspose( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]); + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluate_notranspose(P_list[iw], first, last, + logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]); } -template -void SPOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) +template +void +SPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet) { - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n"); + throw std::runtime_error( + "Need specialization of SPOSet::evaluate_notranspose() for " + "grad_grad_logdet. \n"); } -template -void SPOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) +template +void +SPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n"); + throw std::runtime_error( + "Need specialization of SPOSet::evaluate_notranspose() for " + "grad_grad_grad_logdet. \n"); } -template -std::unique_ptr> SPOSetT::makeClone() const +template +std::unique_ptr> +SPOSetT::makeClone() const { - throw std::runtime_error("Missing SPOSet::makeClone for " + getClassName()); + throw std::runtime_error( + "Missing SPOSet::makeClone for " + getClassName()); } -template -void SPOSetT::basic_report(const std::string& pad) const +template +void +SPOSetT::basic_report(const std::string& pad) const { - app_log() << pad << "size = " << size() << std::endl; - app_log() << pad << "state info:" << std::endl; - //states.report(pad+" "); - app_log().flush(); + app_log() << pad << "size = " << size() << std::endl; + app_log() << pad << "state info:" << std::endl; + // states.report(pad+" "); + app_log().flush(); } -template -void SPOSetT::evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void +SPOSetT::evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi) { - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n"); + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n"); } -template -void SPOSetT::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void +SPOSetT::evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n"); + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n"); } -template -void SPOSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +template +void +SPOSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) { - if (isRotationSupported()) - throw std::logic_error("Bug!! " + getClassName() + - "::applyRotation " - "must be overloaded when the SPOSet supports rotation."); + if (isRotationSupported()) + throw std::logic_error("Bug!! " + getClassName() + + "::applyRotation " + "must be overloaded when the SPOSet supports rotation."); } -template -void SPOSetT::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) +template +void +SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + Vector& dhpsioverpsi, const int& FirstIndex, const int& LastIndex) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivatives " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) +template +void +SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, + int LastIndex) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivativesWF " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) +template +void +SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesType& optvars, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, + int FirstIndex, int LastIndex) { - // Match the fallback in WaveFunctionComponent that evaluates just the ratios - evaluateDetRatios(VP, psi, psiinv, ratios); - - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivRatios " - "must be overloaded when the SPOSet is optimizable."); + // Match the fallback in WaveFunctionComponent that evaluates just the + // ratios + evaluateDetRatios(VP, psi, psiinv, ratios); + + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivRatios " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) +template +void +SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + Vector& dhpsioverpsi, const T& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const GradMatrix& grads_up, const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, const ValueMatrix& B_lapl, + const std::vector& detData_up, const size_t N1, const size_t N2, + const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivatives " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void +SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + const ValueType& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, const std::vector& C2node_dn, + const ValueVector& detValues_up, const ValueVector& detValues_dn, + const ValueMatrix& M_up, const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivativesWF " + "must be overloaded when the SPOSet is optimizable."); } -template -void SPOSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +template +void +SPOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& gradphi) { - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSource " + "must be overloaded when the SPOSet has ion derivatives."); } -template -void SPOSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) +template +void +SPOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) { - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSource " + "must be overloaded when the SPOSet has ion derivatives."); } -template -void SPOSetT::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) +template +void +SPOSetT::evaluateGradSourceRow(const ParticleSetT& P, int iel, + const ParticleSetT& source, int iat_src, GradVector& gradphi) { - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSourceRow " - "must be overloaded when the SPOSet has ion derivatives."); + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSourceRow " + "must be overloaded when the SPOSet has ion derivatives."); } -template -void SPOSetT::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) +template +void +SPOSetT::evaluate_spin( + const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) { - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n"); + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n"); } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h index 6e12c3e929..f3fd993c5c 100644 --- a/src/QMCWaveFunctions/SPOSetT.h +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -1,581 +1,647 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. // -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory Raymond Clay III, +// j.k.rofling@gmail.com, Lawrence Livermore National +// Laboratory Jeremy McMinnis, jmcminis@gmail.com, University +// of Illinois at Urbana-Champaign Jaron T. Krogel, +// krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim +// Kim, jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak +// Ridge National Laboratory Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory William +// F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory // -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_SPOSETT_H #define QMCPLUSPLUS_SPOSETT_H +#include "DualAllocatorAliases.hpp" +#include "OMPTarget/OffloadAlignedAllocators.hpp" #include "OhmmsPETE/OhmmsArray.h" -#include "Particle/ParticleSet.h" -#include "Particle/VirtualParticleSet.h" +#include "OptimizableObjectT.h" +#include "Particle/ParticleSetT.h" +#include "Particle/VirtualParticleSetT.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" -#include "OptimizableObject.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "DualAllocatorAliases.hpp" namespace qmcplusplus { class ResourceCollection; -template +template class SPOSetT; namespace testing { -opt_variables_type& getMyVars(SPOSetT& spo); -opt_variables_type& getMyVars(SPOSetT& spo); -opt_variables_type& getMyVars(SPOSetT>& spo); -opt_variables_type& getMyVars(SPOSetT>& spo); +OptVariablesType& +getMyVars(SPOSetT& spo); +OptVariablesType& +getMyVars(SPOSetT& spo); +OptVariablesType>& +getMyVars(SPOSetT>& spo); +OptVariablesType>& +getMyVars(SPOSetT>& spo); } // namespace testing - /** base class for Single-particle orbital sets * * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains - * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$ + * a number of single-particle orbitals with capabilities of evaluating \f$ + * \psi_j({\bf r}_i)\f$ */ -template +template class SPOSetT : public QMCTraits { public: - using ValueVector = typename OrbitalSetTraits::ValueVector; - using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; - using GradVector = typename OrbitalSetTraits::GradVector; - using GradMatrix = typename OrbitalSetTraits::GradMatrix; - using GradType = TinyVector; - using HessVector = typename OrbitalSetTraits::HessVector; - using HessMatrix = typename OrbitalSetTraits::HessMatrix; - using GGGVector = typename OrbitalSetTraits::GradHessVector; - using GGGMatrix = typename OrbitalSetTraits::GradHessMatrix; - using SPOMap = std::map>>; - using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = Array>; // [walker, Orbs] - using PosType = typename OrbitalSetTraits::PosType; - using RealType = typename OrbitalSetTraits::RealType; - using ValueType = typename OrbitalSetTraits::ValueType; - using FullRealType = typename OrbitalSetTraits::RealType; - template - using OffloadMatrix = Matrix>; - - /** constructor */ - SPOSetT(const std::string& my_name); - - /** destructor - * - * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet. - */ - virtual ~SPOSetT() = default; - - /** return the size of the orbital set - * Ye: this needs to be replaced by getOrbitalSetSize(); - */ - inline int size() const { return OrbitalSetSize; } - - /** print basic SPOSet information - */ - void basic_report(const std::string& pad = "") const; - - /** print SPOSet information - */ - virtual void report(const std::string& pad = "") const { basic_report(pad); } - - - /** return the size of the orbitals - */ - inline int getOrbitalSetSize() const { return OrbitalSetSize; } - - /// Query if this SPOSet is optimizable - virtual bool isOptimizable() const { return false; } - - /** extract underlying OptimizableObject references - * @param opt_obj_refs aggregated list of optimizable object references - */ - virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs); - - /** check out variational optimizable variables - * @param active a super set of optimizable variables - */ - virtual void checkOutVariables(const opt_variables_type& active); - - /// Query if this SPOSet uses OpenMP offload - virtual bool isOMPoffload() const { return false; } - - /** Query if this SPOSet has an explicit ion dependence. returns true if it does. - */ - virtual bool hasIonDerivs() const { return false; } - - /// check a few key parameters before putting the SPO into a determinant - virtual void checkObject() const {} - - /// return true if this SPOSet can be wrappered by RotatedSPO - virtual bool isRotationSupported() const { return false; } - /// store parameters before getting destroyed by rotation. - virtual void storeParamsBeforeRotation() {} - /// apply rotation to all the orbitals - virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); - - /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex); - - /// Parameter derivatives of the wavefunction - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly. - * It doesn't make sense allowing to set the value at any place in the code. - * @TODO make it purely virtual - */ - virtual void setOrbitalSetSize(int norbs){}; - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @TODO make it purely virtual - */ - virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi){}; - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP - * @param VP virtual particle set - * @param psi values of the SPO, used as a scratch space if needed - * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually - * @param ratios return determinant ratios - */ - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios); - - - /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves - virtual void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex); - - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param vp_list a list of virtual particle sets in a walker batch - * @param psi_list a list of values of the SPO, used as a scratch space if needed - * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually - * @param ratios_list a list of returning determinant ratios - */ - virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @TODO make this purely virtual - */ - virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){}; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradients of the SPO - */ - virtual void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin); - - /** evaluate the values this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - */ - virtual void mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - */ - virtual void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin is a dual matrix of spin gradients [nw][norb] - * Note that the device side of mw_dspin is up to date - */ - virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param psi_ratio_grads_v determinant ratio and grads of all the walkers - */ - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - */ - virtual void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi); - - /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - * @param grad_grad_grad_psi grad hessians of the SPO - */ - virtual void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi); - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi); - - /** evaluate the third derivatives of this single-particle orbital set - * @param P current ParticleSet - * @param first first particle - * @param last last particle - * @param grad_grad_grad_logdet third derivatives of the SPO - */ - virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param[in] P current ParticleSet - * @param[in] first starting index of the particles - * @param[in] last ending index of the particles - * @param[out] logdet determinant matrix to be inverted - * @param[out] dlogdet gradients - * @param[out] d2logdet laplacians - * @TODO make this pure virtual - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet){}; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * @param dspinlogdet, spin gradients - * - * default implementation will abort for all SPOSets except SpinorSet - * - */ - virtual void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet); - - virtual void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet); - - /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * @param grad_grad_grad_logdet third derivatives - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi); - - /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients of values - * @param grad_grad_phi gradients of gradients - * @param grad_lapl_phi gradients of laplacians - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi); - - /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. - * - * @param[in] P particle set. - * @param[in] iel The electron at which to evaluate phi(r_iel) - * @param[in] source ion particle set. - * @param[in] iat_src ion ID w.r.t. which to take derivative. - * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). - * @return Void - */ - virtual void evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi); - - /** access the k point related to the given orbital */ - virtual PosType get_k(int orb) { return PosType(); } - - /** initialize a shared resource and hand it to collection - */ - virtual void createResource(ResourceCollection& collection) const {} - - /** acquire a shared resource from collection - */ - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} - - /** return a shared resource to collection - */ - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} - - /** make a clone of itself - * every derived class must implement this to have threading working correctly. - */ - [[noreturn]] virtual std::unique_ptr> makeClone() const; - - /** Used only by cusp correction in AOS LCAO. - * Ye: the SoA LCAO moves all this responsibility to the builder. - * This interface should be removed with AoS. - */ - virtual bool transformSPOSet() { return true; } - - /** finalize the construction of SPOSet - * - * for example, classes serving accelerators may need to transfer data from host to device - * after the host side objects are built. - */ - virtual void finalizeConstruction() {} - - /// return object name - const std::string& getName() const { return my_name_; } - - /// @TODO make this purely virutal return class name - virtual std::string getClassName() const { return ""; }; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; + using GradVector = typename OrbitalSetTraits::GradVector; + using GradMatrix = typename OrbitalSetTraits::GradMatrix; + using GradType = TinyVector; + using HessVector = typename OrbitalSetTraits::HessVector; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGVector = typename OrbitalSetTraits::GradHessVector; + using GGGMatrix = typename OrbitalSetTraits::GradHessMatrix; + using SPOMap = + std::map>>; + using OffloadMWVGLArray = + Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = + Array>; // [walker, Orbs] + using PosType = typename OrbitalSetTraits::PosType; + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using FullRealType = typename OrbitalSetTraits::RealType; + template + using OffloadMatrix = Matrix>; + + /** constructor */ + SPOSetT(const std::string& my_name); + + /** destructor + * + * Derived class destructor needs to pay extra attention to freeing memory + * shared among clones of SPOSet. + */ + virtual ~SPOSetT() = default; + + /** return the size of the orbital set + * Ye: this needs to be replaced by getOrbitalSetSize(); + */ + inline int + size() const + { + return OrbitalSetSize; + } + + /** print basic SPOSet information + */ + void + basic_report(const std::string& pad = "") const; + + /** print SPOSet information + */ + virtual void + report(const std::string& pad = "") const + { + basic_report(pad); + } + + /** return the size of the orbitals + */ + inline int + getOrbitalSetSize() const + { + return OrbitalSetSize; + } + + /// Query if this SPOSet is optimizable + virtual bool + isOptimizable() const + { + return false; + } + + /** extract underlying OptimizableObject references + * @param opt_obj_refs aggregated list of optimizable object references + */ + virtual void + extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs); + + /** check out variational optimizable variables + * @param active a super set of optimizable variables + */ + virtual void + checkOutVariables(const OptVariablesType& active); + + /// Query if this SPOSet uses OpenMP offload + virtual bool + isOMPoffload() const + { + return false; + } + + /** Query if this SPOSet has an explicit ion dependence. returns true if it + * does. + */ + virtual bool + hasIonDerivs() const + { + return false; + } + + /// check a few key parameters before putting the SPO into a determinant + virtual void + checkObject() const + { + } + + /// return true if this SPOSet can be wrappered by RotatedSPO + virtual bool + isRotationSupported() const + { + return false; + } + /// store parameters before getting destroyed by rotation. + virtual void + storeParamsBeforeRotation() + { + } + /// apply rotation to all the orbitals + virtual void + applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); + + /// Parameter derivatives of the wavefunction and the Laplacian of the + /// wavefunction + virtual void + evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, + Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, + const int& LastIndex); + + /// Parameter derivatives of the wavefunction + virtual void + evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, + int LastIndex); + + /** Evaluate the derivative of the optimized orbitals with respect to the + * parameters this is used only for MSD, to be refined for better serving + * both single and multi SD + */ + virtual void + evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, + Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, + const std::vector& Coeff, const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const GradMatrix& grads_up, + const GradMatrix& grads_dn, const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const GradMatrix& B_grad, + const ValueMatrix& B_lapl, const std::vector& detData_up, + const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, + const std::vector>& lookup_tbl); + + /** Evaluate the derivative of the optimized orbitals with respect to the + * parameters this is used only for MSD, to be refined for better serving + * both single and multi SD + */ + virtual void + evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesType& optvars, Vector& dlogpsi, + const ValueType& psiCurrent, const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, const ValueVector& detValues_up, + const ValueVector& detValues_dn, const ValueMatrix& M_up, + const ValueMatrix& M_dn, const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, const std::vector& detData_up, + const std::vector>& lookup_tbl); + + /** set the OrbitalSetSize + * @param norbs number of single-particle orbitals + * Ye: I prefer to remove this interface in the future. SPOSet builders need + * to handle the size correctly. It doesn't make sense allowing to set the + * value at any place in the code. + * @TODO make it purely virtual + */ + virtual void + setOrbitalSetSize(int norbs){}; + + /** evaluate the values of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @TODO make it purely virtual + */ + virtual void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi){}; + + /** evaluate determinant ratios for virtual moves, e.g., sphere move for + * nonlocalPP + * @param VP virtual particle set + * @param psi values of the SPO, used as a scratch space if needed + * @param psiinv the row of inverse slater matrix corresponding to the + * particle moved virtually + * @param ratios return determinant ratios + */ + virtual void + evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios); + + /// Determinant ratios and parameter derivatives of the wavefunction for + /// virtual moves + virtual void + evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesType& optvars, ValueVector& psi, + const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, + int FirstIndex, int LastIndex); + + /** evaluate determinant ratios for virtual moves, e.g., sphere move for + * nonlocalPP, of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param vp_list a list of virtual particle sets in a walker batch + * @param psi_list a list of values of the SPO, used as a scratch space if + * needed + * @param invRow_ptr_list a list of pointers to the rows of inverse slater + * matrix corresponding to the particles moved virtually + * @param ratios_list a list of returning determinant ratios + */ + virtual void + mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @TODO make this purely virtual + */ + virtual void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi){}; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @param dspin spin gradients of the SPO + */ + virtual void + evaluateVGL_spin(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin); + + /** evaluate the values this single-particle orbital sets of multiple + * walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + */ + virtual void + mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + */ + virtual void + mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + * @param mw_dspin is a dual matrix of spin gradients [nw][norb] + * Note that the device side of mw_dspin is up to date + */ + virtual void + mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param psi_ratio_grads_v determinant ratio and grads of all the walkers + */ + virtual void + mw_evaluateVGLandDetRatioGrads( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param ratios, ratios of all walkers + * @param grads, spatial gradients of all walkers + * @param spingrads, spin gradients of all walkers + */ + virtual void + mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads, std::vector& spingrads) const; + + /** evaluate the values, gradients and hessians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param grad_grad_psi hessians of the SPO + */ + virtual void + evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi); + + /** evaluate the values, gradients, hessians, and grad hessians of this + * single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param grad_grad_psi hessians of the SPO + * @param grad_grad_grad_psi grad hessians of the SPO + */ + virtual void + evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi); + + /** evaluate the values of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + */ + virtual void + evaluate_spin( + const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi); + + /** evaluate the third derivatives of this single-particle orbital set + * @param P current ParticleSet + * @param first first particle + * @param last last particle + * @param grad_grad_grad_logdet third derivatives of the SPO + */ + virtual void + evaluateThirdDeriv(const ParticleSetT& P, int first, int last, + GGGMatrix& grad_grad_grad_logdet); + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles + * @param[in] P current ParticleSet + * @param[in] first starting index of the particles + * @param[in] last ending index of the particles + * @param[out] logdet determinant matrix to be inverted + * @param[out] dlogdet gradients + * @param[out] d2logdet laplacians + * @TODO make this pure virtual + */ + virtual void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet){}; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles, including the spin gradient + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param d2logdet laplacians + * @param dspinlogdet, spin gradients + * + * default implementation will abort for all SPOSets except SpinorSet + * + */ + virtual void + evaluate_notranspose_spin(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet); + + virtual void + mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const; + + /** evaluate the values, gradients and hessians of this single-particle + * orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param grad_grad_logdet hessians + * + */ + virtual void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet); + + /** evaluate the values, gradients, hessians and third derivatives of this + * single-particle orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param grad_grad_logdet hessians + * @param grad_grad_grad_logdet third derivatives + * + */ + virtual void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet); + + /** evaluate the gradients of this single-particle orbital + * for [first,last) target particles with respect to the given source + * particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients + * + */ + virtual void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& gradphi); + + /** evaluate the gradients of values, gradients, laplacians of this + * single-particle orbital for [first,last) target particles with respect to + * the given source particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients of values + * @param grad_grad_phi gradients of gradients + * @param grad_lapl_phi gradients of laplacians + * + */ + virtual void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi); + + /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. + * + * @param[in] P particle set. + * @param[in] iel The electron at which to evaluate phi(r_iel) + * @param[in] source ion particle set. + * @param[in] iat_src ion ID w.r.t. which to take derivative. + * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). + * @return Void + */ + virtual void + evaluateGradSourceRow(const ParticleSetT& P, int iel, + const ParticleSetT& source, int iat_src, GradVector& gradphi); + + /** access the k point related to the given orbital */ + virtual PosType + get_k(int orb) + { + return PosType(); + } + + /** initialize a shared resource and hand it to collection + */ + virtual void + createResource(ResourceCollection& collection) const + { + } + + /** acquire a shared resource from collection + */ + virtual void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const + { + } + + /** return a shared resource to collection + */ + virtual void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const + { + } + + /** make a clone of itself + * every derived class must implement this to have threading working + * correctly. + */ + [[noreturn]] virtual std::unique_ptr> + makeClone() const; + + /** Used only by cusp correction in AOS LCAO. + * Ye: the SoA LCAO moves all this responsibility to the builder. + * This interface should be removed with AoS. + */ + virtual bool + transformSPOSet() + { + return true; + } + + /** finalize the construction of SPOSet + * + * for example, classes serving accelerators may need to transfer data from + * host to device after the host side objects are built. + */ + virtual void + finalizeConstruction() + { + } + + /// return object name + const std::string& + getName() const + { + return my_name_; + } + + /// @TODO make this purely virutal return class name + virtual std::string + getClassName() const + { + return ""; + }; protected: - /// name of the object, unique identifier - const std::string my_name_; - ///number of Single-particle orbitals - IndexType OrbitalSetSize; - /// Optimizable variables - opt_variables_type myVars; - - friend opt_variables_type& testing::getMyVars(SPOSetT& spo); - friend opt_variables_type& testing::getMyVars(SPOSetT& spo); - friend opt_variables_type& testing::getMyVars(SPOSetT>& spo); - friend opt_variables_type& testing::getMyVars(SPOSetT>& spo); + /// name of the object, unique identifier + const std::string my_name_; + /// number of Single-particle orbitals + IndexType OrbitalSetSize; + /// Optimizable variables + OptVariablesType myVars; + + friend OptVariablesType& + testing::getMyVars(SPOSetT& spo); + friend OptVariablesType& + testing::getMyVars(SPOSetT& spo); + friend OptVariablesType>& + testing::getMyVars(SPOSetT>& spo); + friend OptVariablesType>& + testing::getMyVars(SPOSetT>& spo); }; -template +template using SPOSetTPtr = SPOSetT*; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp index 64d7d3d6b1..1090397ad1 100644 --- a/src/QMCWaveFunctions/SpinorSetT.cpp +++ b/src/QMCWaveFunctions/SpinorSetT.cpp @@ -1,583 +1,621 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2022 QMCPACK developers // -// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories -// Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National +// Laboratories +// Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories // -// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National +// Laboratories ////////////////////////////////////////////////////////////////////////////////////// #include "SpinorSetT.h" -#include "Utilities/ResourceCollection.h" + #include "Platforms/OMPTarget/OMPTargetMath.hpp" +#include "Utilities/ResourceCollection.h" namespace qmcplusplus { -template +template struct SpinorSetT::SpinorSetMultiWalkerResource : public Resource { - SpinorSetMultiWalkerResource() : Resource("SpinorSet") {} - SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {} - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; - std::vector up_ratios, dn_ratios; - std::vector up_grads, dn_grads; - std::vector spins; + SpinorSetMultiWalkerResource() : Resource("SpinorSet") + { + } + SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : + SpinorSetMultiWalkerResource() + { + } + std::unique_ptr + makeClone() const override + { + return std::make_unique(*this); + } + OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; + std::vector up_ratios, dn_ratios; + std::vector up_grads, dn_grads; + std::vector spins; }; -template -SpinorSetT::SpinorSetT(const std::string& my_name) : SPOSetT(my_name), spo_up(nullptr), spo_dn(nullptr) -{} +template +SpinorSetT::SpinorSetT(const std::string& my_name) : + SPOSetT(my_name), + spo_up(nullptr), + spo_dn(nullptr) +{ +} -template +template SpinorSetT::~SpinorSetT() = default; -template -void SpinorSetT::set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn) +template +void +SpinorSetT::set_spos( + std::unique_ptr>&& up, std::unique_ptr>&& dn) { - //Sanity check for input SPO's. They need to be the same size or - IndexType spo_size_up = up->getOrbitalSetSize(); - IndexType spo_size_down = dn->getOrbitalSetSize(); + // Sanity check for input SPO's. They need to be the same size or + IndexType spo_size_up = up->getOrbitalSetSize(); + IndexType spo_size_down = dn->getOrbitalSetSize(); - if (spo_size_up != spo_size_down) - throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO components have different sizes."); + if (spo_size_up != spo_size_down) + throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO " + "components have different sizes."); - setOrbitalSetSize(spo_size_up); + setOrbitalSetSize(spo_size_up); - spo_up = std::move(up); - spo_dn = std::move(dn); + spo_up = std::move(up); + spo_dn = std::move(dn); - psi_work_up.resize(this->OrbitalSetSize); - psi_work_down.resize(this->OrbitalSetSize); + psi_work_up.resize(this->OrbitalSetSize); + psi_work_down.resize(this->OrbitalSetSize); - dpsi_work_up.resize(this->OrbitalSetSize); - dpsi_work_down.resize(this->OrbitalSetSize); + dpsi_work_up.resize(this->OrbitalSetSize); + dpsi_work_down.resize(this->OrbitalSetSize); - d2psi_work_up.resize(this->OrbitalSetSize); - d2psi_work_down.resize(this->OrbitalSetSize); + d2psi_work_up.resize(this->OrbitalSetSize); + d2psi_work_down.resize(this->OrbitalSetSize); } -template -void SpinorSetT::setOrbitalSetSize(int norbs) +template +void +SpinorSetT::setOrbitalSetSize(int norbs) { - this->OrbitalSetSize = norbs; + this->OrbitalSetSize = norbs; }; -template -void SpinorSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +SpinorSetT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& psi) { - psi_work_up = 0.0; - psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); + spo_up->evaluateValue(P, iat, psi_work_up); + spo_dn->evaluateValue(P, iat, psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - //This is only supported in the complex build, so T is some complex number depending on the precision. - T eis(coss, sins); - T emis(coss, -sins); + // This is only supported in the complex build, so T is some complex number + // depending on the precision. + T eis(coss, sins); + T emis(coss, -sins); - psi = eis * psi_work_up + emis * psi_work_down; + psi = eis * psi_work_up + emis * psi_work_down; } -template -void SpinorSetT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void +SpinorSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) { - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; + dpsi_work_up = 0.0; + dpsi_work_down = 0.0; + d2psi_work_up = 0.0; + d2psi_work_down = 0.0; - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); + spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); + spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); + T eis(coss, sins); + T emis(coss, -sins); - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eis * dpsi_work_up + emis * dpsi_work_down; + d2psi = eis * d2psi_work_up + emis * d2psi_work_down; } -template -void SpinorSetT::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) +template +void +SpinorSetT::evaluateVGL_spin(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) { - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; - - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); + psi_work_up = 0.0; + psi_work_down = 0.0; + dpsi_work_up = 0.0; + dpsi_work_down = 0.0; + d2psi_work_up = 0.0; + d2psi_work_down = 0.0; - ParticleSet::Scalar_t s = P.activeSpin(iat); + spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); + spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - RealType coss(0.0), sins(0.0); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - coss = std::cos(s); - sins = std::sin(s); - - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); - - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; - dspin = eye * (eis * psi_work_up - emis * psi_work_down); -} + RealType coss(0.0), sins(0.0); -template -void SpinorSetT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - RefVector up_psi_v_list, dn_psi_v_list; - RefVector up_dpsi_v_list, dn_dpsi_v_list; - RefVector up_d2psi_v_list, dn_d2psi_v_list; - for (int iw = 0; iw < nw; iw++) - { - auto& spo = spo_list.template getCastedElement>(iw); - up_psi_v_list.push_back(spo.psi_work_up); - dn_psi_v_list.push_back(spo.psi_work_down); - up_dpsi_v_list.push_back(spo.dpsi_work_up); - dn_dpsi_v_list.push_back(spo.dpsi_work_down); - up_d2psi_v_list.push_back(spo.d2psi_work_up); - dn_d2psi_v_list.push_back(spo.d2psi_work_down); - } - - up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list); - dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list); - - for (int iw = 0; iw < nw; iw++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); T eis(coss, sins); T emis(coss, -sins); T eye(0, 1.0); - psi_v_list[iw].get() = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); - dpsi_v_list[iw].get() = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); - d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) - mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]); - } - //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device - mw_dspin.updateTo(); + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eis * dpsi_work_up + emis * dpsi_work_down; + d2psi = eis * d2psi_work_up + emis * d2psi_work_down; + dspin = eye * (eis * psi_work_up - emis * psi_work_down); } -template -void SpinorSetT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const +template +void +SpinorSetT::mw_evaluateVGLWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const { - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - - auto& mw_res = spo_leader.mw_res_handle_.getResource(); - auto& up_phi_vgl_v = mw_res.up_phi_vgl_v; - auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v; - auto& up_ratios = mw_res.up_ratios; - auto& dn_ratios = mw_res.dn_ratios; - auto& up_grads = mw_res.up_grads; - auto& dn_grads = mw_res.dn_grads; - auto& spins = mw_res.spins; - - up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); - dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); - up_ratios.resize(nw); - dn_ratios.resize(nw); - up_grads.resize(nw); - dn_grads.resize(nw); - spins.resize(nw); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios, - up_grads); - dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios, - dn_grads); - for (int iw = 0; iw < nw; iw++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - spins[iw] = s; - RealType coss = std::cos(s); - RealType sins = std::sin(s); - - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + + IndexType nw = spo_list.size(); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + RefVector up_psi_v_list, dn_psi_v_list; + RefVector up_dpsi_v_list, dn_dpsi_v_list; + RefVector up_d2psi_v_list, dn_d2psi_v_list; + for (int iw = 0; iw < nw; iw++) { + auto& spo = spo_list.template getCastedElement>(iw); + up_psi_v_list.push_back(spo.psi_work_up); + dn_psi_v_list.push_back(spo.psi_work_down); + up_dpsi_v_list.push_back(spo.dpsi_work_up); + dn_dpsi_v_list.push_back(spo.dpsi_work_down); + up_d2psi_v_list.push_back(spo.d2psi_work_up); + dn_d2psi_v_list.push_back(spo.d2psi_work_down); + } - ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; - grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw]; - spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw]; - } - - auto* spins_ptr = spins.data(); - //This data lives on the device - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); - auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); - PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])") - for (int iw = 0; iw < nw; iw++) - { - RealType c, s; - omptarget::sincos(spins_ptr[iw], &s, &c); - T eis(c, s), emis(c, -s); - PRAGMA_OFFLOAD("omp parallel for collapse(2)") - for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++) - for (int iorb = 0; iorb < norb_requested; iorb++) - { - auto offset = idim * nw * norb_requested + iw * norb_requested + iorb; - phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset]; - } - } + up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, + up_dpsi_v_list, up_d2psi_v_list); + dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, + dn_dpsi_v_list, dn_d2psi_v_list); + + for (int iw = 0; iw < nw; iw++) { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); + + psi_v_list[iw].get() = + eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); + dpsi_v_list[iw].get() = + eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); + d2psi_v_list[iw].get() = + eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + mw_dspin(iw, iorb) = eye * + (eis * (up_psi_v_list[iw].get())[iorb] - + emis * (dn_psi_v_list[iw].get())[iorb]); + } + // Data above is all on host, but since mw_dspin is DualMatrix we need to + // sync the host and device + mw_dspin.updateTo(); } -template -void SpinorSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +SpinorSetT::mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, std::vector& grads, + std::vector& spingrads) const { - IndexType nelec = P.getTotalNum(); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + + auto& mw_res = spo_leader.mw_res_handle_.getResource(); + auto& up_phi_vgl_v = mw_res.up_phi_vgl_v; + auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v; + auto& up_ratios = mw_res.up_ratios; + auto& dn_ratios = mw_res.dn_ratios; + auto& up_grads = mw_res.up_grads; + auto& dn_grads = mw_res.dn_grads; + auto& spins = mw_res.spins; + + up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); + dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); + up_ratios.resize(nw); + dn_ratios.resize(nw); + up_grads.resize(nw); + dn_grads.resize(nw); + spins.resize(nw); + + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, + invRow_ptr_list, up_phi_vgl_v, up_ratios, up_grads); + dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, + invRow_ptr_list, dn_phi_vgl_v, dn_ratios, dn_grads); + for (int iw = 0; iw < nw; iw++) { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + spins[iw] = s; + RealType coss = std::cos(s); + RealType sins = std::sin(s); + + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); + + ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; + grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + + emis * dn_grads[iw] * dn_ratios[iw]) / + ratios[iw]; + spingrads[iw] = + eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw]; + } - logpsi_work_up.resize(nelec, this->OrbitalSetSize); - logpsi_work_down.resize(nelec, this->OrbitalSetSize); + auto* spins_ptr = spins.data(); + // This data lives on the device + auto* phi_vgl_ptr = phi_vgl_v.data(); + auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); + auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); + PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])") + for (int iw = 0; iw < nw; iw++) { + RealType c, s; + omptarget::sincos(spins_ptr[iw], &s, &c); + T eis(c, s), emis(c, -s); + PRAGMA_OFFLOAD("omp parallel for collapse(2)") + for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iorb = 0; iorb < norb_requested; iorb++) { + auto offset = + idim * nw * norb_requested + iw * norb_requested + iorb; + phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + + emis * dn_phi_vgl_ptr[offset]; + } + } +} - dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); +template +void +SpinorSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +{ + IndexType nelec = P.getTotalNum(); - d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); + spo_up->evaluate_notranspose( + P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); + spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, + dlogpsi_work_down, d2logpsi_work_down); - RealType coss(0.0), sins(0.0); + for (int iat = 0; iat < nelec; iat++) { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - coss = std::cos(s); - sins = std::sin(s); + RealType coss(0.0), sins(0.0); - T eis(coss, sins); - T emis(coss, -sins); + coss = std::cos(s); + sins = std::sin(s); - for (int no = 0; no < this->OrbitalSetSize; no++) - { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); + T eis(coss, sins); + T emis(coss, -sins); + + for (int no = 0; no < this->OrbitalSetSize; no++) { + logdet(iat, no) = eis * logpsi_work_up(iat, no) + + emis * logpsi_work_down(iat, no); + dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + + emis * dlogpsi_work_down(iat, no); + d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + + emis * d2logpsi_work_down(iat, no); + } } - } } -template -void SpinorSetT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void +SpinorSetT::mw_evaluate_notranspose( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - IndexType nelec = P_leader.getTotalNum(); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - std::vector mw_up_logdet, mw_dn_logdet; - std::vector mw_up_dlogdet, mw_dn_dlogdet; - std::vector mw_up_d2logdet, mw_dn_d2logdet; - mw_up_logdet.reserve(nw); - mw_dn_logdet.reserve(nw); - mw_up_dlogdet.reserve(nw); - mw_dn_dlogdet.reserve(nw); - mw_up_d2logdet.reserve(nw); - mw_dn_d2logdet.reserve(nw); - - RefVector up_logdet_list, dn_logdet_list; - RefVector up_dlogdet_list, dn_dlogdet_list; - RefVector up_d2logdet_list, dn_d2logdet_list; - up_logdet_list.reserve(nw); - dn_logdet_list.reserve(nw); - up_dlogdet_list.reserve(nw); - dn_dlogdet_list.reserve(nw); - up_d2logdet_list.reserve(nw); - dn_d2logdet_list.reserve(nw); - - ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize); - GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize); - for (int iw = 0; iw < nw; iw++) - { - mw_up_logdet.emplace_back(tmp_val_mat); - up_logdet_list.emplace_back(mw_up_logdet.back()); - mw_dn_logdet.emplace_back(tmp_val_mat); - dn_logdet_list.emplace_back(mw_dn_logdet.back()); - - mw_up_dlogdet.emplace_back(tmp_grad_mat); - up_dlogdet_list.emplace_back(mw_up_dlogdet.back()); - mw_dn_dlogdet.emplace_back(tmp_grad_mat); - dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back()); - - mw_up_d2logdet.emplace_back(tmp_val_mat); - up_d2logdet_list.emplace_back(mw_up_d2logdet.back()); - mw_dn_d2logdet.emplace_back(tmp_val_mat); - dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back()); - } - - up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list, - up_d2logdet_list); - dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list, - dn_d2logdet_list); - - for (int iw = 0; iw < nw; iw++) - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - - for (int no = 0; no < this->OrbitalSetSize; no++) - { - logdet_list[iw].get()(iat, no) = - eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no); - dlogdet_list[iw].get()(iat, no) = - eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no); - d2logdet_list[iw].get()(iat, no) = - eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no); - } + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + + IndexType nw = spo_list.size(); + IndexType nelec = P_leader.getTotalNum(); + + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + std::vector mw_up_logdet, mw_dn_logdet; + std::vector mw_up_dlogdet, mw_dn_dlogdet; + std::vector mw_up_d2logdet, mw_dn_d2logdet; + mw_up_logdet.reserve(nw); + mw_dn_logdet.reserve(nw); + mw_up_dlogdet.reserve(nw); + mw_dn_dlogdet.reserve(nw); + mw_up_d2logdet.reserve(nw); + mw_dn_d2logdet.reserve(nw); + + RefVector up_logdet_list, dn_logdet_list; + RefVector up_dlogdet_list, dn_dlogdet_list; + RefVector up_d2logdet_list, dn_d2logdet_list; + up_logdet_list.reserve(nw); + dn_logdet_list.reserve(nw); + up_dlogdet_list.reserve(nw); + dn_dlogdet_list.reserve(nw); + up_d2logdet_list.reserve(nw); + dn_d2logdet_list.reserve(nw); + + ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize); + GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize); + for (int iw = 0; iw < nw; iw++) { + mw_up_logdet.emplace_back(tmp_val_mat); + up_logdet_list.emplace_back(mw_up_logdet.back()); + mw_dn_logdet.emplace_back(tmp_val_mat); + dn_logdet_list.emplace_back(mw_dn_logdet.back()); + + mw_up_dlogdet.emplace_back(tmp_grad_mat); + up_dlogdet_list.emplace_back(mw_up_dlogdet.back()); + mw_dn_dlogdet.emplace_back(tmp_grad_mat); + dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back()); + + mw_up_d2logdet.emplace_back(tmp_val_mat); + up_d2logdet_list.emplace_back(mw_up_d2logdet.back()); + mw_dn_d2logdet.emplace_back(tmp_val_mat); + dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back()); } + + up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, + up_logdet_list, up_dlogdet_list, up_d2logdet_list); + dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, + dn_logdet_list, dn_dlogdet_list, dn_d2logdet_list); + + for (int iw = 0; iw < nw; iw++) + for (int iat = 0; iat < nelec; iat++) { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); + + for (int no = 0; no < this->OrbitalSetSize; no++) { + logdet_list[iw].get()(iat, no) = + eis * up_logdet_list[iw].get()(iat, no) + + emis * dn_logdet_list[iw].get()(iat, no); + dlogdet_list[iw].get()(iat, no) = + eis * up_dlogdet_list[iw].get()(iat, no) + + emis * dn_dlogdet_list[iw].get()(iat, no); + d2logdet_list[iw].get()(iat, no) = + eis * up_d2logdet_list[iw].get()(iat, no) + + emis * dn_d2logdet_list[iw].get()(iat, no); + } + } } -template -void SpinorSetT::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) +template +void +SpinorSetT::evaluate_notranspose_spin(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) { - IndexType nelec = P.getTotalNum(); - - logpsi_work_up.resize(nelec, this->OrbitalSetSize); - logpsi_work_down.resize(nelec, this->OrbitalSetSize); + IndexType nelec = P.getTotalNum(); - dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); - spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); + spo_up->evaluate_notranspose( + P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); + spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, + dlogpsi_work_down, d2logpsi_work_down); - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); + for (int iat = 0; iat < nelec; iat++) { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - for (int no = 0; no < this->OrbitalSetSize; no++) - { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); - dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no)); + for (int no = 0; no < this->OrbitalSetSize; no++) { + logdet(iat, no) = eis * logpsi_work_up(iat, no) + + emis * logpsi_work_down(iat, no); + dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + + emis * dlogpsi_work_down(iat, no); + d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + + emis * d2logpsi_work_down(iat, no); + dspinlogdet(iat, no) = eye * + (eis * logpsi_work_up(iat, no) - + emis * logpsi_work_down(iat, no)); + } } - } } -template -void SpinorSetT::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) +template +void +SpinorSetT::evaluate_spin( + const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) { - psi_work_up = 0.0; - psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); + spo_up->evaluateValue(P, iat, psi_work_up); + spo_dn->evaluateValue(P, iat, psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eye * (eis * psi_work_up - emis * psi_work_down); + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eye * (eis * psi_work_up - emis * psi_work_down); } -template -void SpinorSetT::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +template +void +SpinorSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, GradMatrix& gradphi) { - IndexType nelec = P.getTotalNum(); - - GradMatrix gradphi_up(nelec, this->OrbitalSetSize); - GradMatrix gradphi_dn(nelec, this->OrbitalSetSize); - spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); - spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); - - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - for (int imo = 0; imo < this->OrbitalSetSize; imo++) - gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; - } + IndexType nelec = P.getTotalNum(); + + GradMatrix gradphi_up(nelec, this->OrbitalSetSize); + GradMatrix gradphi_dn(nelec, this->OrbitalSetSize); + spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); + spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); + + for (int iat = 0; iat < nelec; iat++) { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); + for (int imo = 0; imo < this->OrbitalSetSize; imo++) + gradphi(iat, imo) = + gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; + } } -template -std::unique_ptr> SpinorSetT::makeClone() const +template +std::unique_ptr> +SpinorSetT::makeClone() const { - auto myclone = std::make_unique>(this->my_name_); - std::unique_ptr> cloneup(spo_up->makeClone()); - std::unique_ptr> clonedn(spo_dn->makeClone()); - myclone->set_spos(std::move(cloneup), std::move(clonedn)); - return myclone; + auto myclone = std::make_unique>(this->my_name_); + std::unique_ptr> cloneup(spo_up->makeClone()); + std::unique_ptr> clonedn(spo_dn->makeClone()); + myclone->set_spos(std::move(cloneup), std::move(clonedn)); + return myclone; } -template -void SpinorSetT::createResource(ResourceCollection& collection) const +template +void +SpinorSetT::createResource(ResourceCollection& collection) const { - spo_up->createResource(collection); - spo_dn->createResource(collection); - auto index = collection.addResource(std::make_unique()); + spo_up->createResource(collection); + spo_dn->createResource(collection); + auto index = collection.addResource( + std::make_unique()); } -template -void SpinorSetT::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void +SpinorSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.acquireResource(collection, up_spo_list); - dn_spo_leader.acquireResource(collection, dn_spo_list); - spo_leader.mw_res_handle_ = collection.lendResource(); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + up_spo_leader.acquireResource(collection, up_spo_list); + dn_spo_leader.acquireResource(collection, dn_spo_list); + spo_leader.mw_res_handle_ = + collection.lendResource(); } -template -void SpinorSetT::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void +SpinorSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.releaseResource(collection, up_spo_list); - dn_spo_leader.releaseResource(collection, dn_spo_list); - collection.takebackResource(spo_leader.mw_res_handle_); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + up_spo_leader.releaseResource(collection, up_spo_list); + dn_spo_leader.releaseResource(collection, dn_spo_list); + collection.takebackResource(spo_leader.mw_res_handle_); } -template -std::pair>, RefVectorWithLeader>> SpinorSetT::extractSpinComponentRefList( +template +std::pair>, RefVectorWithLeader>> +SpinorSetT::extractSpinComponentRefList( const RefVectorWithLeader>& spo_list) const { - SpinorSetT& spo_leader = spo_list.template getCastedLeader>(); - IndexType nw = spo_list.size(); - SPOSetT& up_spo_leader = *(spo_leader.spo_up); - SPOSetT& dn_spo_leader = *(spo_leader.spo_dn); - RefVectorWithLeader> up_spo_list(up_spo_leader); - RefVectorWithLeader> dn_spo_list(dn_spo_leader); - up_spo_list.reserve(nw); - dn_spo_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) - { - SpinorSetT& spinor = spo_list.template getCastedElement>(iw); - up_spo_list.emplace_back(*(spinor.spo_up)); - dn_spo_list.emplace_back(*(spinor.spo_dn)); - } - return std::make_pair(up_spo_list, dn_spo_list); + SpinorSetT& spo_leader = + spo_list.template getCastedLeader>(); + IndexType nw = spo_list.size(); + SPOSetT& up_spo_leader = *(spo_leader.spo_up); + SPOSetT& dn_spo_leader = *(spo_leader.spo_dn); + RefVectorWithLeader> up_spo_list(up_spo_leader); + RefVectorWithLeader> dn_spo_list(dn_spo_leader); + up_spo_list.reserve(nw); + dn_spo_list.reserve(nw); + for (int iw = 0; iw < nw; iw++) { + SpinorSetT& spinor = + spo_list.template getCastedElement>(iw); + up_spo_list.emplace_back(*(spinor.spo_up)); + dn_spo_list.emplace_back(*(spinor.spo_dn)); + } + return std::make_pair(up_spo_list, dn_spo_list); } template class SpinorSetT>; diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h index fe50a256fe..08990e350b 100644 --- a/src/QMCWaveFunctions/SpinorSetT.h +++ b/src/QMCWaveFunctions/SpinorSetT.h @@ -1,13 +1,16 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2022 QMCPACK developers // -// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories -// Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National +// Laboratories +// Cody A. Melton, cmelton@sandia.gov, Sandia National +// Laboratories // -// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National +// Laboratories ////////////////////////////////////////////////////////////////////////////////////// #ifndef QMCPLUSPLUS_SPINORSETT_H @@ -21,208 +24,244 @@ namespace qmcplusplus /** Class for Melton & Mitas style Spinors. * */ -template +template class SpinorSetT : public SPOSetT { public: - using ValueMatrix = typename SPOSetT::ValueMatrix; - using ValueVector = typename SPOSetT::ValueVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; - using GradVector = typename SPOSetT::GradVector; - using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] - //using OffloadMWVGLArray = typename SPOSetT::template OffloadMWCGLArray; - template - using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; - using RealType = typename SPOSetT::RealType; - using IndexType = OHMMS_INDEXTYPE; - - /** constructor */ - SpinorSetT(const std::string& my_name); - ~SpinorSetT() override; - - std::string getClassName() const override { return "SpinorSet"; } - bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); } - bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); } - bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); } - - //This class is initialized by separately building the up and down channels of the spinor set and - //then registering them. - void set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - */ - void setOrbitalSetSize(int norbs) override; - - /** evaluate the values of this spinor set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - */ - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradient of the SPO - */ - void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) override; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals - */ - void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * - */ - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) override; - /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat. - * @param P current particle set. - * @param iat electron index. - * @param spinor values. - * @param spin gradient values. d/ds phi(r,s). - * - */ - void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override; - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - - std::unique_ptr> makeClone() const override; - - void createResource(ResourceCollection& collection) const override; - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; - - /// check if the multi walker resource is owned. For testing only. - bool isResourceOwned() const { return bool(mw_res_handle_); } + using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using GradVector = typename SPOSetT::GradVector; + using OffloadMWVGLArray = + Array>; // [VGL, walker, Orbs] + // using OffloadMWVGLArray = typename SPOSetT::template + // OffloadMWCGLArray; + template + using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; + using RealType = typename SPOSetT::RealType; + using IndexType = OHMMS_INDEXTYPE; + + /** constructor */ + SpinorSetT(const std::string& my_name); + ~SpinorSetT() override; + + std::string + getClassName() const override + { + return "SpinorSet"; + } + bool + isOptimizable() const override + { + return spo_up->isOptimizable() || spo_dn->isOptimizable(); + } + bool + isOMPoffload() const override + { + return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); + } + bool + hasIonDerivs() const override + { + return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); + } + + // This class is initialized by separately building the up and down channels + // of the spinor set and then registering them. + void + set_spos( + std::unique_ptr>&& up, std::unique_ptr>&& dn); + + /** set the OrbitalSetSize + * @param norbs number of single-particle orbitals + */ + void + setOrbitalSetSize(int norbs) override; + + /** evaluate the values of this spinor set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + */ + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + */ + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @param dspin spin gradient of the SPO + */ + void + evaluateVGL_spin(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) override; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals + */ + void + mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param ratios, ratios of all walkers + * @param grads, spatial gradients of all walkers + * @param spingrads, spin gradients of all walkers + */ + void + mw_evaluateVGLandDetRatioGradsWithSpin( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, + std::vector& grads, std::vector& spingrads) const override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param d2logdet laplacians + * + */ + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + void + mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, int first, int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void + evaluate_notranspose_spin(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) override; + /** Evaluate the values, spin gradients, and spin laplacians of single + * particle spinors corresponding to electron iat. + * @param P current particle set. + * @param iat electron index. + * @param spinor values. + * @param spin gradient values. d/ds phi(r,s). + * + */ + void + evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, + ValueVector& dpsi) override; + + /** evaluate the gradients of this single-particle orbital + * for [first,last) target particles with respect to the given source + * particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients + * + */ + virtual void + evaluateGradSource(const ParticleSetT& P, int first, int last, + const ParticleSetT& source, int iat_src, + GradMatrix& gradphi) override; + + std::unique_ptr> + makeClone() const override; + + void + createResource(ResourceCollection& collection) const override; + + void + acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override; + + void + releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const override; + + /// check if the multi walker resource is owned. For testing only. + bool + isResourceOwned() const + { + return bool(mw_res_handle_); + } private: - struct SpinorSetMultiWalkerResource; - ResourceHandle mw_res_handle_; - - std::pair>, RefVectorWithLeader>> extractSpinComponentRefList( - const RefVectorWithLeader>& spo_list) const; - - //Sposet for the up and down channels of our spinors. - std::unique_ptr> spo_up; - std::unique_ptr> spo_dn; - - //temporary arrays for holding the values of the up and down channels respectively. - ValueVector psi_work_up; - ValueVector psi_work_down; - - //temporary arrays for holding the gradients of the up and down channels respectively. - GradVector dpsi_work_up; - GradVector dpsi_work_down; - - //temporary arrays for holding the laplacians of the up and down channels respectively. - ValueVector d2psi_work_up; - ValueVector d2psi_work_down; - - //Same as above, but these are the full matrices containing all spinor/particle combinations. - ValueMatrix logpsi_work_up; - ValueMatrix logpsi_work_down; - - GradMatrix dlogpsi_work_up; - GradMatrix dlogpsi_work_down; - - ValueMatrix d2logpsi_work_up; - ValueMatrix d2logpsi_work_down; + struct SpinorSetMultiWalkerResource; + ResourceHandle mw_res_handle_; + + std::pair>, RefVectorWithLeader>> + extractSpinComponentRefList( + const RefVectorWithLeader>& spo_list) const; + + // Sposet for the up and down channels of our spinors. + std::unique_ptr> spo_up; + std::unique_ptr> spo_dn; + + // temporary arrays for holding the values of the up and down channels + // respectively. + ValueVector psi_work_up; + ValueVector psi_work_down; + + // temporary arrays for holding the gradients of the up and down channels + // respectively. + GradVector dpsi_work_up; + GradVector dpsi_work_down; + + // temporary arrays for holding the laplacians of the up and down channels + // respectively. + ValueVector d2psi_work_up; + ValueVector d2psi_work_down; + + // Same as above, but these are the full matrices containing all + // spinor/particle combinations. + ValueMatrix logpsi_work_up; + ValueMatrix logpsi_work_down; + + GradMatrix dlogpsi_work_up; + GradMatrix dlogpsi_work_down; + + ValueMatrix d2logpsi_work_up; + ValueMatrix d2logpsi_work_down; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/VariableSetT.cpp b/src/QMCWaveFunctions/VariableSetT.cpp new file mode 100644 index 0000000000..064ac26a13 --- /dev/null +++ b/src/QMCWaveFunctions/VariableSetT.cpp @@ -0,0 +1,346 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of +// Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of +// Illinois at Urbana-Champaign Mark A. Berrill, +// berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois +// at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "VariableSetT.h" + +#include "Host/sysutil.h" +#include "io/hdf/hdf_archive.h" + +#include +#include +#include +#include +#include + +using std::setw; + +namespace optimize +{ +template +void +VariableSetT::clear() +{ + num_active_vars = 0; + Index.clear(); + NameAndValue.clear(); + Recompute.clear(); + ParameterType.clear(); +} + +template +void +VariableSetT::insertFrom(const VariableSetT& input) +{ + for (int i = 0; i < input.size(); ++i) { + iterator loc = find(input.name(i)); + if (loc == NameAndValue.end()) { + Index.push_back(input.Index[i]); + NameAndValue.push_back(input.NameAndValue[i]); + ParameterType.push_back(input.ParameterType[i]); + Recompute.push_back(input.Recompute[i]); + } + else + (*loc).second = input.NameAndValue[i].second; + } + num_active_vars = input.num_active_vars; +} + +template +void +VariableSetT::insertFromSum( + const VariableSetT& input_1, const VariableSetT& input_2) +{ + value_type sum_val; + std::string vname; + + // Check that objects to be summed together have the same number of active + // variables. + if (input_1.num_active_vars != input_2.num_active_vars) + throw std::runtime_error( + "Inconsistent number of parameters in two provided " + "variable sets."); + + for (int i = 0; i < input_1.size(); ++i) { + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // adding them. + if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) + throw std::runtime_error( + "Inconsistent parameters exist in the two provided " + "variable sets."); + + sum_val = + input_1.NameAndValue[i].second + input_2.NameAndValue[i].second; + + iterator loc = find(input_1.name(i)); + if (loc == NameAndValue.end()) { + Index.push_back(input_1.Index[i]); + ParameterType.push_back(input_1.ParameterType[i]); + Recompute.push_back(input_1.Recompute[i]); + + // We can reuse the above values, which aren't summed between the + // objects, but the parameter values themselves need to use the + // summed values. + vname = input_1.NameAndValue[i].first; + NameAndValue.push_back(pair_type(vname, sum_val)); + } + else + (*loc).second = sum_val; + } + num_active_vars = input_1.num_active_vars; +} + +template +void +VariableSetT::insertFromDiff( + const VariableSetT& input_1, const VariableSetT& input_2) +{ + value_type diff_val; + std::string vname; + + // Check that objects to be subtracted have the same number of active + // variables. + if (input_1.num_active_vars != input_2.num_active_vars) + throw std::runtime_error( + "Inconsistent number of parameters in two provided " + "variable sets."); + + for (int i = 0; i < input_1.size(); ++i) { + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // subtracting them. + if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) + throw std::runtime_error( + "Inconsistent parameters exist in the two provided " + "variable sets."); + + diff_val = + input_1.NameAndValue[i].second - input_2.NameAndValue[i].second; + + iterator loc = find(input_1.name(i)); + if (loc == NameAndValue.end()) { + Index.push_back(input_1.Index[i]); + ParameterType.push_back(input_1.ParameterType[i]); + Recompute.push_back(input_1.Recompute[i]); + + // We can reuse the above values, which aren't subtracted between + // the objects, but the parameter values themselves need to use the + // subtracted values. + vname = input_1.NameAndValue[i].first; + NameAndValue.push_back(pair_type(vname, diff_val)); + } + else + (*loc).second = diff_val; + } + num_active_vars = input_1.num_active_vars; +} + +template +void +VariableSetT::removeInactive() +{ + std::vector valid(Index); + std::vector acopy(NameAndValue); + std::vector bcopy(Recompute), ccopy(ParameterType); + num_active_vars = 0; + Index.clear(); + NameAndValue.clear(); + Recompute.clear(); + ParameterType.clear(); + for (int i = 0; i < valid.size(); ++i) { + if (valid[i] > -1) { + Index.push_back(num_active_vars++); + NameAndValue.push_back(acopy[i]); + Recompute.push_back(bcopy[i]); + ParameterType.push_back(ccopy[i]); + } + } +} + +template +void +VariableSetT::resetIndex() +{ + num_active_vars = 0; + for (int i = 0; i < Index.size(); ++i) { + Index[i] = (Index[i] < 0) ? -1 : num_active_vars++; + } +} + +template +void +VariableSetT::getIndex(const VariableSetT& selected) +{ + num_active_vars = 0; + for (int i = 0; i < NameAndValue.size(); ++i) { + Index[i] = selected.getIndex(NameAndValue[i].first); + if (Index[i] >= 0) + num_active_vars++; + } +} + +template +int +VariableSetT::getIndex(const std::string& vname) const +{ + int loc = 0; + while (loc != NameAndValue.size()) { + if (NameAndValue[loc].first == vname) + return Index[loc]; + ++loc; + } + return -1; +} + +template +void +VariableSetT::setIndexDefault() +{ + for (int i = 0; i < Index.size(); ++i) + Index[i] = i; +} + +template +void +VariableSetT::print( + std::ostream& os, int leftPadSpaces, bool printHeader) const +{ + std::string pad_str = std::string(leftPadSpaces, ' '); + int max_name_len = 0; + if (NameAndValue.size() > 0) + max_name_len = std::max_element(NameAndValue.begin(), + NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) { + return e1.first.length() < e2.first.length(); + })->first.length(); + + int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading + // value, period, and exponent. + int max_type_len = 1; + int max_recompute_len = 1; + int max_use_len = 3; + int max_index_len = 1; + if (printHeader) { + max_name_len = std::max(max_name_len, 4); // size of "Name" header + max_type_len = 4; + max_recompute_len = 9; + max_index_len = 5; + os << pad_str << setw(max_name_len) << "Name" + << " " << setw(max_value_len) << "Value" + << " " << setw(max_type_len) << "Type" + << " " << setw(max_recompute_len) << "Recompute" + << " " << setw(max_use_len) << "Use" + << " " << setw(max_index_len) << "Index" << std::endl; + os << pad_str << std::setfill('-') << setw(max_name_len) << "" + << " " << setw(max_value_len) << "" + << " " << setw(max_type_len) << "" + << " " << setw(max_recompute_len) << "" + << " " << setw(max_use_len) << "" + << " " << setw(max_index_len) << "" << std::endl; + os << std::setfill(' '); + } + + for (int i = 0; i < NameAndValue.size(); ++i) { + os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " + << std::setprecision(6) << std::scientific << setw(max_value_len) + << NameAndValue[i].second << " " << setw(max_type_len) + << ParameterType[i].second << " " << setw(max_recompute_len) + << Recompute[i].second << " "; + + os << std::defaultfloat; + + if (Index[i] < 0) + os << setw(max_use_len) << "OFF" << std::endl; + else + os << setw(max_use_len) << "ON" + << " " << setw(max_index_len) << Index[i] << std::endl; + } +} + +template +void +VariableSetT::writeToHDF( + const std::string& filename, qmcplusplus::hdf_archive& hout) const +{ + hout.create(filename); + + // File Versioning + // 1.0.0 Initial file version + // 1.1.0 Files could have object-specific data from + // OptimizableObject::read/writeVariationalParameters + std::vector vp_file_version{1, 1, 0}; + hout.write(vp_file_version, "version"); + + std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); + hout.write(timestamp, "timestamp"); + + hout.push("name_value_lists"); + + std::vector param_values; + std::vector param_names; + for (auto& pair_it : NameAndValue) { + param_names.push_back(pair_it.first); + param_values.push_back(pair_it.second); + } + + hout.write(param_names, "parameter_names"); + hout.write(param_values, "parameter_values"); + hout.pop(); +} + +template +void +VariableSetT::readFromHDF( + const std::string& filename, qmcplusplus::hdf_archive& hin) +{ + if (!hin.open(filename, H5F_ACC_RDONLY)) { + std::ostringstream err_msg; + err_msg << "Unable to open VP file: " << filename; + throw std::runtime_error(err_msg.str()); + } + + try { + hin.push("name_value_lists", false); + } + catch (std::runtime_error&) { + std::ostringstream err_msg; + err_msg << "The group name_value_lists in not present in file: " + << filename; + throw std::runtime_error(err_msg.str()); + } + + std::vector param_values; + hin.read(param_values, "parameter_values"); + + std::vector param_names; + hin.read(param_names, "parameter_names"); + + for (int i = 0; i < param_names.size(); i++) { + std::string& vp_name = param_names[i]; + // Find and set values by name. + // Values that are not present do not get added. + if (find(vp_name) != end()) + (*this)[vp_name] = param_values[i]; + } + + hin.pop(); +} + +template struct VariableSetT; +template struct VariableSetT; +template struct VariableSetT>; +template struct VariableSetT>; + +} // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSetT.h b/src/QMCWaveFunctions/VariableSetT.h new file mode 100644 index 0000000000..9a0675a184 --- /dev/null +++ b/src/QMCWaveFunctions/VariableSetT.h @@ -0,0 +1,336 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H +#define QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H +#include "config.h" +#include +#include +#include +#include +#include "VariableSet.h" +#include "OrbitalSetTraits.h" + +namespace qmcplusplus +{ +class hdf_archive; +} + +namespace optimize +{ +/** An enum useful for determining the type of parameter is being optimized. +* knowing this in the opt routine can reduce the computational load. +*/ +// enum +// { +// OTHER_P = 0, +// LOGLINEAR_P, //B-spline Jastrows +// LOGLINEAR_K, //K space Jastrows +// LINEAR_P, //Multi-determinant coefficients +// SPO_P, //SPO set Parameters +// BACKFLOW_P //Backflow parameters +// }; + +/** class to handle a set of variables that can be modified during optimizations + * + * A serialized container of named variables. + */ +template +struct VariableSetT +{ + using value_type = typename qmcplusplus::OrbitalSetTraits::ValueType; + using real_type = typename qmcplusplus::OrbitalSetTraits::RealType; + + using pair_type = std::pair; + using index_pair_type = std::pair; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + using size_type = typename std::vector::size_type; + + ///number of active variables + int num_active_vars; + /** store locator of the named variable + * + * if(Index[i] == -1), the named variable is not active + */ + std::vector Index; + std::vector NameAndValue; + std::vector ParameterType; + std::vector Recompute; + + ///default constructor + inline VariableSetT() : num_active_vars(0) {} + ///viturval destructor for safety + virtual ~VariableSetT() = default; + /** if any of Index value is not zero, return true + */ + inline bool is_optimizable() const { return num_active_vars > 0; } + ///return the number of active variables + inline int size_of_active() const { return num_active_vars; } + ///return the first const_iterator + inline const_iterator begin() const { return NameAndValue.begin(); } + ///return the last const_iterator + inline const_iterator end() const { return NameAndValue.end(); } + ///return the first iterator + inline iterator begin() { return NameAndValue.begin(); } + ///return the last iterator + inline iterator end() { return NameAndValue.end(); } + ///return the size + inline size_type size() const { return NameAndValue.size(); } + ///return the locator of the i-th Index + inline int where(int i) const { return Index[i]; } + /** return the iterator of a named parameter + * @param vname name of a parameter + * @return the locator of vname + * + * If vname is not found among the Names, return NameAndValue.end() + * so that ::end() member function can be used to validate the iterator. + */ + inline iterator find(const std::string& vname) + { + return std::find_if(NameAndValue.begin(), NameAndValue.end(), + [&vname](const auto& value) { return value.first == vname; }); + } + + /** return the Index vaule for the named parameter + * @param vname name of the variable + * + * If vname is not found in this variables, return -1; + */ + int getIndex(const std::string& vname) const; + + /* return the NameAndValue index for the named parameter + * @ param vname name of the variable + * + * Differs from getIndex by not relying on the indices cached in Index + * myVars[i] will always return the value of the parameter if it is stored + * regardless of whether or not the Index array has been correctly reset + * + * if vname is not found, return -1 + * + */ + inline int getLoc(const std::string& vname) const + { + int loc = 0; + while (loc != NameAndValue.size()) + { + if (NameAndValue[loc].first == vname) + return loc; + ++loc; + } + return -1; + } + + inline void insert(const std::string& vname, value_type v, bool enable = true, int type = OTHER_P) + { + iterator loc = find(vname); + int ind_loc = loc - NameAndValue.begin(); + if (loc == NameAndValue.end()) // && enable==true) + { + Index.push_back(ind_loc); + NameAndValue.push_back(pair_type(vname, v)); + ParameterType.push_back(index_pair_type(vname, type)); + Recompute.push_back(index_pair_type(vname, 1)); + } + //disable it if enable == false + if (!enable) + Index[ind_loc] = -1; + } + + inline void setParameterType(int type) + { + std::vector::iterator PTit(ParameterType.begin()), PTend(ParameterType.end()); + while (PTit != PTend) + { + (*PTit).second = type; + PTit++; + } + } + + inline void getParameterTypeList(std::vector& types) const + { + auto ptit(ParameterType.begin()), ptend(ParameterType.end()); + types.resize(ptend - ptit); + auto tit(types.begin()); + while (ptit != ptend) + (*tit++) = (*ptit++).second; + } + + + /** equivalent to std::map[string] operator + */ + inline value_type& operator[](const std::string& vname) + { + iterator loc = find(vname); + if (loc == NameAndValue.end()) + { + Index.push_back(-1); + NameAndValue.push_back(pair_type(vname, 0)); + ParameterType.push_back(index_pair_type(vname, 0)); + Recompute.push_back(index_pair_type(vname, 1)); + return NameAndValue.back().second; + } + return (*loc).second; + } + + + /** return the name of i-th variable + * @param i index + */ + const std::string& name(int i) const { return NameAndValue[i].first; } + + /** return the i-th value + * @param i index + */ + inline value_type operator[](int i) const { return NameAndValue[i].second; } + + /** assign the i-th value + * @param i index + */ + inline value_type& operator[](int i) { return NameAndValue[i].second; } + + /** get the i-th parameter's type + * @param i index + */ + inline int getType(int i) const { return ParameterType[i].second; } + + inline bool recompute(int i) const { return (Recompute[i].second == 1); } + + inline int& recompute(int i) { return Recompute[i].second; } + + inline void setComputed() + { + for (int i = 0; i < Recompute.size(); i++) + { + if (ParameterType[i].second == LOGLINEAR_P) + Recompute[i].second = 0; + else if (ParameterType[i].second == LOGLINEAR_K) + Recompute[i].second = 0; + else + Recompute[i].second = 1; + } + } + + inline void setRecompute() + { + for (int i = 0; i < Recompute.size(); i++) + Recompute[i].second = 1; + } + + /** clear the variable set + * + * Remove all the data. + */ + void clear(); + + /** insert a VariableSet to the list + * @param input variables + */ + void insertFrom(const VariableSetT& input); + + /** sum together the values of the optimizable parameter values in + * two VariableSet objects, and set this object's values to equal them. + * @param first set of input variables + * @param second set of input variables + */ + void insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2); + + /** take the difference (input_1-input_2) of values of the optimizable + * parameter values in two VariableSet objects, and set this object's + * values to equal them. + * @param first set of input variables + * @param second set of input variables + */ + void insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2); + + /** activate variables for optimization + * @param first iterator of the first name + * @param last iterator of the last name + * @param reindex if true, Index is updated + * + * The status of a variable that is not included in the [first,last) + * remains the same. + */ + template + void activate(ForwardIterator first, ForwardIterator last, bool reindex) + { + while (first != last) + { + iterator loc = find(*first++); + if (loc != NameAndValue.end()) + { + int i = loc - NameAndValue.begin(); + if (Index[i] < 0) + Index[i] = num_active_vars++; + } + } + if (reindex) + { + removeInactive(); + resetIndex(); + } + } + + /** deactivate variables for optimization + * @param first iterator of the first name + * @param last iterator of the last name + * @param reindex if true, the variales are removed and Index is updated + */ + template + void disable(ForwardIterator first, ForwardIterator last, bool reindex) + { + while (first != last) + { + int loc = find(*first++) - NameAndValue.begin(); + if (loc < NameAndValue.size()) + Index[loc] = -1; + } + if (reindex) + { + removeInactive(); + resetIndex(); + } + } + + /** reset Index + */ + void resetIndex(); + /** remove inactive variables and trim the internal data + */ + void removeInactive(); + + /** set the index table of this VariableSet + * @param selected input variables + * + * This VariableSet is a subset of selected. + */ + void getIndex(const VariableSetT& selected); + + /** set default Indices, namely all the variables are active + */ + void setIndexDefault(); + + void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; + + // Save variational parameters to an HDF file + void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const; + + /// Read variational parameters from an HDF file. + /// This assumes VariableSet is already set up. + void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin); +}; +} // namespace optimize + +#endif diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp index 49e5070241..ecdb5dd696 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp @@ -1,10 +1,11 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2023 Raymond Clay and QMCPACK developers. // -// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National +// Laboratories // // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories ////////////////////////////////////////////////////////////////////////////////////// @@ -14,106 +15,110 @@ namespace qmcplusplus { -template -ConstantSPOSetT::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals) - : SPOSetT(my_name), numparticles_(nparticles) +template +ConstantSPOSetT::ConstantSPOSetT( + const std::string& my_name, const int nparticles, const int norbitals) : + SPOSetT(my_name), + numparticles_(nparticles) { - this->OrbitalSetSize = norbitals; - ref_psi_.resize(numparticles_, this->OrbitalSetSize); - ref_egrad_.resize(numparticles_, this->OrbitalSetSize); - ref_elapl_.resize(numparticles_, this->OrbitalSetSize); + this->OrbitalSetSize = norbitals; + ref_psi_.resize(numparticles_, this->OrbitalSetSize); + ref_egrad_.resize(numparticles_, this->OrbitalSetSize); + ref_elapl_.resize(numparticles_, this->OrbitalSetSize); - ref_psi_ = 0.0; - ref_egrad_ = 0.0; - ref_elapl_ = 0.0; + ref_psi_ = 0.0; + ref_egrad_ = 0.0; + ref_elapl_ = 0.0; } -template -std::unique_ptr> ConstantSPOSetT::makeClone() const +template +std::unique_ptr> +ConstantSPOSetT::makeClone() const { - auto myclone = std::make_unique>(this->my_name_, numparticles_, this->OrbitalSetSize); - myclone->setRefVals(ref_psi_); - myclone->setRefEGrads(ref_egrad_); - myclone->setRefELapls(ref_elapl_); - return myclone; + auto myclone = std::make_unique>( + this->my_name_, numparticles_, this->OrbitalSetSize); + myclone->setRefVals(ref_psi_); + myclone->setRefEGrads(ref_egrad_); + myclone->setRefELapls(ref_elapl_); + return myclone; } -template -void ConstantSPOSetT::checkOutVariables(const opt_variables_type& active) +template +void +ConstantSPOSetT::checkOutVariables(const OptVariablesType& active) { - APP_ABORT("ConstantSPOSet should not call checkOutVariables"); + APP_ABORT("ConstantSPOSet should not call checkOutVariables"); }; -template -void ConstantSPOSetT::setOrbitalSetSize(int norbs) +template +void +ConstantSPOSetT::setOrbitalSetSize(int norbs) { - APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); + APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); } -template -void ConstantSPOSetT::setRefVals(const ValueMatrix& vals) +template +void +ConstantSPOSetT::setRefVals(const ValueMatrix& vals) { - assert(vals.cols() == this->OrbitalSetSize); - assert(vals.rows() == numparticles_); - ref_psi_ = vals; + assert(vals.cols() == this->OrbitalSetSize); + assert(vals.rows() == numparticles_); + ref_psi_ = vals; } -template -void ConstantSPOSetT::setRefEGrads(const GradMatrix& grads) +template +void +ConstantSPOSetT::setRefEGrads(const GradMatrix& grads) { - assert(grads.cols() == this->OrbitalSetSize); - assert(grads.rows() == numparticles_); - ref_egrad_ = grads; + assert(grads.cols() == this->OrbitalSetSize); + assert(grads.rows() == numparticles_); + ref_egrad_ = grads; } -template -void ConstantSPOSetT::setRefELapls(const ValueMatrix& lapls) +template +void +ConstantSPOSetT::setRefELapls(const ValueMatrix& lapls) { - assert(lapls.cols() == this->OrbitalSetSize); - assert(lapls.rows() == numparticles_); - ref_elapl_ = lapls; + assert(lapls.cols() == this->OrbitalSetSize); + assert(lapls.rows() == numparticles_); + ref_elapl_ = lapls; } -template -void ConstantSPOSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +ConstantSPOSetT::evaluateValue( + const ParticleSetT& P, int iat, ValueVector& psi) { - const auto* vp = dynamic_cast(&P); - int ptcl = vp ? vp->refPtcl : iat; - assert(psi.size() == this->OrbitalSetSize); - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) - psi[iorb] = ref_psi_(ptcl, iorb); + const auto* vp = dynamic_cast*>(&P); + int ptcl = vp ? vp->refPtcl : iat; + assert(psi.size() == this->OrbitalSetSize); + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + psi[iorb] = ref_psi_(ptcl, iorb); } -template -void ConstantSPOSetT::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void +ConstantSPOSetT::evaluateVGL(const ParticleSetT& P, int iat, + ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) - { - psi[iorb] = ref_psi_(iat, iorb); - dpsi[iorb] = ref_egrad_(iat, iorb); - d2psi[iorb] = ref_elapl_(iat, iorb); - } + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) { + psi[iorb] = ref_psi_(iat, iorb); + dpsi[iorb] = ref_egrad_(iat, iorb); + d2psi[iorb] = ref_elapl_(iat, iorb); + } } -template -void ConstantSPOSetT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +ConstantSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, + int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } + for (int iat = first, i = 0; iat < last; ++iat, ++i) { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + evaluateVGL(P, iat, v, g, l); + } } template class ConstantSPOSetT; @@ -121,4 +126,4 @@ template class ConstantSPOSetT; template class ConstantSPOSetT>; template class ConstantSPOSetT>; -} //namespace qmcplusplus +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h index 483136360a..d1ee5b24f7 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h @@ -1,15 +1,15 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2023 Raymond Clay and QMCPACK developers. // -// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National +// Laboratories // // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_CONSTANTSPOSETT_H #define QMCPLUSPLUS_CONSTANTSPOSETT_H @@ -17,77 +17,92 @@ namespace qmcplusplus { -/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc., - * These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc. - * Exists to provide deterministic and known output to objects requiring SPOSet evaluations. - * - */ -template +/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing + * value, gradients, and laplacians, etc., These values are accessed through + * standard SPOSet calls like evaluateValue, evaluateVGL, etc. Exists to provide + * deterministic and known output to objects requiring SPOSet evaluations. + * + */ +template class ConstantSPOSetT : public SPOSetT { public: - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - - ConstantSPOSetT(const std::string& my_name) = delete; - - //Constructor needs number of particles and number of orbitals. This is the minimum - //amount of information needed to sanely construct all data members and perform size - //checks later. - ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals); - - std::unique_ptr> makeClone() const final; - - std::string getClassName() const final { return "ConstantSPOSet"; }; - - void checkOutVariables(const opt_variables_type& active) final; - - void setOrbitalSetSize(int norbs) final; - - /** - * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. - * @param Nelec x Nion ValueType matrix of \phi_j(r_i) - * @return void - */ - void setRefVals(const ValueMatrix& vals); - /** - * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_. - * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) - * @return void - */ - void setRefEGrads(const GradMatrix& grads); - /** - * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_. - * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) - * @return void - */ - void setRefELapls(const ValueMatrix& lapls); - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + + ConstantSPOSetT(const std::string& my_name) = delete; + + // Constructor needs number of particles and number of orbitals. This is + // the minimum amount of information needed to sanely construct all data + // members and perform size checks later. + ConstantSPOSetT( + const std::string& my_name, const int nparticles, const int norbitals); + + std::unique_ptr> + makeClone() const final; + + std::string + getClassName() const final + { + return "ConstantSPOSet"; + }; + + void + checkOutVariables(const OptVariablesType& active) final; + + void + setOrbitalSetSize(int norbs) final; + + /** + * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. + * @param Nelec x Nion ValueType matrix of \phi_j(r_i) + * @return void + */ + void + setRefVals(const ValueMatrix& vals); + /** + * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in + * ref_egrad_. + * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) + * @return void + */ + void + setRefEGrads(const GradMatrix& grads); + /** + * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix + * in ref_elapl_. + * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) + * @return void + */ + void + setRefELapls(const ValueMatrix& lapls); + + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) final; + + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; private: - const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices. - /// To ensure consistent array sizing and enforcement, - /// we agree at construction how large these matrices will be. - /// norb is stored in SPOSet::OrbitalSetSize. - - //Value, electron gradient, and electron laplacian at "reference configuration". - //i.e. before any attempted moves. - - ValueMatrix ref_psi_; - GradMatrix ref_egrad_; - ValueMatrix ref_elapl_; + const int numparticles_; /// evaluate_notranspose arrays are nparticle x + /// norb matrices. To ensure consistent array + /// sizing and enforcement, we agree at + /// construction how large these matrices will be. + /// norb is stored in SPOSet::OrbitalSetSize. + + // Value, electron gradient, and electron laplacian at "reference + // configuration". i.e. before any attempted moves. + + ValueMatrix ref_psi_; + GradMatrix ref_egrad_; + ValueMatrix ref_elapl_; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp index fcf1637682..85678ce5f3 100644 --- a/src/QMCWaveFunctions/tests/FakeSPOT.cpp +++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -13,142 +13,135 @@ namespace qmcplusplus { -template +template FakeSPOT::FakeSPOT() : SPOSetT("one_FakeSPO") { - a.resize(3, 3); + a.resize(3, 3); - a(0, 0) = 2.3; - a(0, 1) = 4.5; - a(0, 2) = 2.6; - a(1, 0) = 0.5; - a(1, 1) = 8.5; - a(1, 2) = 3.3; - a(2, 0) = 1.8; - a(2, 1) = 4.4; - a(2, 2) = 4.9; + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; - v.resize(3); - v[0] = 1.9; - v[1] = 2.0; - v[2] = 3.1; + v.resize(3); + v[0] = 1.9; + v[1] = 2.0; + v[2] = 3.1; + a2.resize(4, 4); + a2(0, 0) = 2.3; + a2(0, 1) = 4.5; + a2(0, 2) = 2.6; + a2(0, 3) = 1.2; + a2(1, 0) = 0.5; + a2(1, 1) = 8.5; + a2(1, 2) = 3.3; + a2(1, 3) = 0.3; + a2(2, 0) = 1.8; + a2(2, 1) = 4.4; + a2(2, 2) = 4.9; + a2(2, 3) = 2.8; + a2(3, 0) = 0.8; + a2(3, 1) = 4.1; + a2(3, 2) = 3.2; + a2(3, 3) = 1.1; - a2.resize(4, 4); - a2(0, 0) = 2.3; - a2(0, 1) = 4.5; - a2(0, 2) = 2.6; - a2(0, 3) = 1.2; - a2(1, 0) = 0.5; - a2(1, 1) = 8.5; - a2(1, 2) = 3.3; - a2(1, 3) = 0.3; - a2(2, 0) = 1.8; - a2(2, 1) = 4.4; - a2(2, 2) = 4.9; - a2(2, 3) = 2.8; - a2(3, 0) = 0.8; - a2(3, 1) = 4.1; - a2(3, 2) = 3.2; - a2(3, 3) = 1.1; + v2.resize(4, 4); - v2.resize(4, 4); + v2(0, 0) = 3.2; + v2(0, 1) = 0.5; + v2(0, 2) = 5.9; + v2(0, 3) = 3.7; + v2(1, 0) = 0.3; + v2(1, 1) = 1.4; + v2(1, 2) = 3.9; + v2(1, 3) = 8.2; + v2(2, 0) = 3.3; + v2(2, 1) = 5.4; + v2(2, 2) = 4.9; + v2(2, 3) = 2.2; + v2(3, 1) = 5.4; + v2(3, 2) = 4.9; + v2(3, 3) = 2.2; - v2(0, 0) = 3.2; - v2(0, 1) = 0.5; - v2(0, 2) = 5.9; - v2(0, 3) = 3.7; - v2(1, 0) = 0.3; - v2(1, 1) = 1.4; - v2(1, 2) = 3.9; - v2(1, 3) = 8.2; - v2(2, 0) = 3.3; - v2(2, 1) = 5.4; - v2(2, 2) = 4.9; - v2(2, 3) = 2.2; - v2(3, 1) = 5.4; - v2(3, 2) = 4.9; - v2(3, 3) = 2.2; - - gv.resize(4); - gv[0] = GradType(1.0, 0.0, 0.1); - gv[1] = GradType(1.0, 2.0, 0.1); - gv[2] = GradType(2.0, 1.0, 0.1); - gv[3] = GradType(0.4, 0.3, 0.1); + gv.resize(4); + gv[0] = GradType(1.0, 0.0, 0.1); + gv[1] = GradType(1.0, 2.0, 0.1); + gv[2] = GradType(2.0, 1.0, 0.1); + gv[3] = GradType(0.4, 0.3, 0.1); } -template -std::unique_ptr> FakeSPOT::makeClone() const +template +std::unique_ptr> +FakeSPOT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void FakeSPOT::setOrbitalSetSize(int norbs) +template +void +FakeSPOT::setOrbitalSetSize(int norbs) { - this->OrbitalSetSize = norbs; + this->OrbitalSetSize = norbs; } -template -void FakeSPOT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void +FakeSPOT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - if (iat < 0) - for (int i = 0; i < psi.size(); i++) - psi[i] = 1.2 * i - i * i; - else if (this->OrbitalSetSize == 3) - for (int i = 0; i < 3; i++) - psi[i] = a(iat, i); - else if (this->OrbitalSetSize == 4) - for (int i = 0; i < 4; i++) - psi[i] = a2(iat, i); + if (iat < 0) + for (int i = 0; i < psi.size(); i++) + psi[i] = 1.2 * i - i * i; + else if (this->OrbitalSetSize == 3) + for (int i = 0; i < 3; i++) + psi[i] = a(iat, i); + else if (this->OrbitalSetSize == 4) + for (int i = 0; i < 4; i++) + psi[i] = a2(iat, i); } -template -void FakeSPOT::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void +FakeSPOT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) { - if (this->OrbitalSetSize == 3) - { - for (int i = 0; i < 3; i++) - { - psi[i] = v[i]; - dpsi[i] = gv[i]; + if (this->OrbitalSetSize == 3) { + for (int i = 0; i < 3; i++) { + psi[i] = v[i]; + dpsi[i] = gv[i]; + } } - } - else if (this->OrbitalSetSize == 4) - { - for (int i = 0; i < 4; i++) - { - psi[i] = v2(iat, i); - dpsi[i] = gv[i]; + else if (this->OrbitalSetSize == 4) { + for (int i = 0; i < 4; i++) { + psi[i] = v2(iat, i); + dpsi[i] = gv[i]; + } } - } } -template -void FakeSPOT::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void +FakeSPOT::evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) { - if (this->OrbitalSetSize == 3) - { - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - { - logdet(j, i) = a(i, j); - dlogdet[i][j] = gv[j] + GradType(i); - } - } - else if (this->OrbitalSetSize == 4) - { - for (int i = 0; i < 4; i++) - for (int j = 0; j < 4; j++) - { - logdet(j, i) = a2(i, j); - dlogdet[i][j] = gv[j] + GradType(i); - } - } + if (this->OrbitalSetSize == 3) { + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { + logdet(j, i) = a(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } + else if (this->OrbitalSetSize == 4) { + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) { + logdet(j, i) = a2(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h index dfa6689bd6..f0a6f1ef80 100644 --- a/src/QMCWaveFunctions/tests/FakeSPOT.h +++ b/src/QMCWaveFunctions/tests/FakeSPOT.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -9,7 +9,6 @@ // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_FAKESPOTT_H #define QMCPLUSPLUS_FAKESPOTT_H @@ -17,45 +16,55 @@ namespace qmcplusplus { -template +template class FakeSPOT : public SPOSetT { public: - Matrix a; - Matrix a2; - Vector v; - Matrix v2; + Matrix a; + Matrix a2; + Vector v; + Matrix v2; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; - typename SPOSetT::GradVector gv; + typename SPOSetT::GradVector gv; - FakeSPOT(); + FakeSPOT(); - ~FakeSPOT() override = default; + ~FakeSPOT() override = default; - std::string getClassName() const override { return "FakeSPO"; } + std::string + getClassName() const override + { + return "FakeSPO"; + } - std::unique_ptr> makeClone() const override; + std::unique_ptr> + makeClone() const override; - virtual void report() {} + virtual void + report() + { + } - void setOrbitalSetSize(int norbs) override; + void + setOrbitalSetSize(int norbs) override; - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; + void + evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + void + evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, + GradVector& dpsi, ValueVector& d2psi) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; + void + evaluate_notranspose(const ParticleSetT& P, int first, int last, + ValueMatrix& logdet, GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp index 87425bbb91..56d5b22e8a 100644 --- a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp +++ b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp @@ -63,8 +63,8 @@ TEST_CASE("ConstantSPOSetT", "[wavefunction]") } - const SimulationCell simulation_cell; - ParticleSet elec(simulation_cell); + const SimulationCellT simulation_cell; + ParticleSetT elec(simulation_cell); elec.create({nelec}); diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp index 79e0e920a4..83aecc96d4 100644 --- a/src/type_traits/complex_help.hpp +++ b/src/type_traits/complex_help.hpp @@ -12,6 +12,9 @@ #ifndef QMCPLUSPLUS_COMPLEX_HELP_HPP #define QMCPLUSPLUS_COMPLEX_HELP_HPP +#include +#include + namespace qmcplusplus { template