diff --git a/src/Particle/DistanceTableT.h b/src/Particle/DistanceTableT.h index 5eaba1bd44c..54cf206e386 100644 --- a/src/Particle/DistanceTableT.h +++ b/src/Particle/DistanceTableT.h @@ -41,128 +41,95 @@ class ResourceCollection; * update/compute invoked by ParticleSet. Derived AA/AB classes handle the * actual storage and data access. */ -template +template class DistanceTableT { public: - static constexpr unsigned DIM = OHMMS_DIM; + static constexpr unsigned DIM = OHMMS_DIM; - using IndexType = typename ParticleSetTraits::IndexType; - using RealType = typename ParticleSetTraits::RealType; - using PosType = typename ParticleSetTraits::PosType; - using DistRow = Vector>; - using DisplRow = VectorSoaContainer; + using IndexType = typename ParticleSetTraits::IndexType; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using DistRow = Vector>; + using DisplRow = VectorSoaContainer; protected: - // FIXME. once DT takes only DynamicCoordinates, change this type as well. - const ParticleSetT& origin_; + // FIXME. once DT takes only DynamicCoordinates, change this type as well. + const ParticleSetT& origin_; - const size_t num_sources_; - const size_t num_targets_; + const size_t num_sources_; + const size_t num_targets_; - /// name of the table - const std::string name_; + /// name of the table + const std::string name_; - /// operation modes defined by DTModes - DTModes modes_; + /// operation modes defined by DTModes + DTModes modes_; public: - /// constructor using source and target ParticleSet - DistanceTableT(const ParticleSetT& source, const ParticleSetT& target, - DTModes modes) : - origin_(source), + /// constructor using source and target ParticleSet + DistanceTableT(const ParticleSetT& source, const ParticleSetT& target, DTModes modes) + : origin_(source), num_sources_(source.getTotalNum()), num_targets_(target.getTotalNum()), name_(source.getName() + "_" + target.getName()), modes_(modes) - { - } - - /// copy constructor. deleted - DistanceTableT(const DistanceTableT&) = delete; - - /// virutal destructor - virtual ~DistanceTableT() = default; - - /// get modes - inline DTModes - getModes() const - { - return modes_; - } - - /// set modes - inline void - setModes(DTModes modes) - { - modes_ = modes; - } - - /// return the name of table - inline const std::string& - getName() const - { - return name_; - } - - /// returns the reference the origin particleset - const ParticleSetT& - get_origin() const - { - return origin_; - } - - /// returns the number of centers - inline size_t - centers() const - { - return origin_.getTotalNum(); - } - - /// returns the number of centers - inline size_t - targets() const - { - return num_targets_; - } - - /// returns the number of source particles - inline size_t - sources() const - { - return num_sources_; - } - - /** evaluate the full Distance Table + {} + + /// copy constructor. deleted + DistanceTableT(const DistanceTableT&) = delete; + + /// virutal destructor + virtual ~DistanceTableT() = default; + + /// get modes + inline DTModes getModes() const { return modes_; } + + /// set modes + inline void setModes(DTModes modes) { modes_ = modes; } + + /// return the name of table + inline const std::string& getName() const { return name_; } + + /// returns the reference the origin particleset + const ParticleSetT& get_origin() const { return origin_; } + + /// returns the number of centers + inline size_t centers() const { return origin_.getTotalNum(); } + + /// returns the number of centers + inline size_t targets() const { return num_targets_; } + + /// returns the number of source particles + inline size_t sources() const { return num_sources_; } + + /** evaluate the full Distance Table * @param P the target particle set */ - virtual void - evaluate(ParticleSetT& P) = 0; - virtual void - mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader>& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].evaluate(p_list[iw]); - } - - /** recompute multi walker internal data, recompute + virtual void evaluate(ParticleSetT& P) = 0; + virtual void mw_evaluate(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].evaluate(p_list[iw]); + } + + /** recompute multi walker internal data, recompute * @param dt_list the distance table batch * @param p_list the target particle set batch * @param recompute if true, must recompute. Otherwise, implementation * dependent. */ - virtual void - mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader>& p_list, - const std::vector& recompute) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - if (recompute[iw]) - dt_list[iw].evaluate(p_list[iw]); - } - - /** evaluate the temporary pair relations when a move is proposed + virtual void mw_recompute(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& recompute) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + if (recompute[iw]) + dt_list[iw].evaluate(p_list[iw]); + } + + /** evaluate the temporary pair relations when a move is proposed * @param P the target particle set * @param rnew proposed new position * @param iat the particle to be moved @@ -175,81 +142,73 @@ class DistanceTableT * move. Drivers/Hamiltonians know whether moves will be accepted or not and * manage this flag when calling ParticleSet::makeMoveXXX functions. */ - virtual void - move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, - bool prepare_old = true) = 0; + virtual void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old = true) = 0; - /** walker batched version of move. this function may be implemented + /** walker batched version of move. this function may be implemented * asynchronously. Additional synchroniziation for collecting results should * be handled by the caller. If DTModes::NEED_TEMP_DATA_ON_HOST, host data * will be updated. If no consumer requests data on the host, the transfer * is skipped. */ - virtual void - mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader>& p_list, - const std::vector& rnew_list, const IndexType iat, - bool prepare_old = true) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); - } - - /** update the distance table by the pair relations from the temporal + virtual void mw_move(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& rnew_list, + const IndexType iat, + bool prepare_old = true) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); + } + + /** update the distance table by the pair relations from the temporal * position. Used when a move is accepted in regular mode * @param iat the particle with an accepted move */ - virtual void - update(IndexType jat) = 0; + virtual void update(IndexType jat) = 0; - /** fill partially the distance table by the pair relations from the + /** fill partially the distance table by the pair relations from the * temporary or old particle position. Used in forward mode when a move is * reject * @param iat the particle with an accepted move * @param from_temp if true, copy from temp. if false, copy from old */ - virtual void - updatePartial(IndexType jat, bool from_temp) - { - if (from_temp) - update(jat); - } - - /** walker batched version of updatePartial. + virtual void updatePartial(IndexType jat, bool from_temp) + { + if (from_temp) + update(jat); + } + + /** walker batched version of updatePartial. * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and * host distance table will not be updated. */ - virtual void - mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, const std::vector& from_temp) - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - /** finalize distance table calculation after particle-by-particle moves + virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, + IndexType jat, + const std::vector& from_temp) + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].updatePartial(jat, from_temp[iw]); + } + + /** finalize distance table calculation after particle-by-particle moves * if update() doesn't make the table up-to-date during p-by-p moves * finalizePbyP takes action to bring the table up-to-date */ - virtual void - finalizePbyP(const ParticleSetT& P) - { - } + virtual void finalizePbyP(const ParticleSetT& P) {} - /** walker batched version of finalizePbyP + /** walker batched version of finalizePbyP * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not * updated at all during p-by-p Thus, a recompute is necessary to update the * whole host distance table for consumers like the Coulomb potential. */ - virtual void - mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader>& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].finalizePbyP(p_list[iw]); - } - - /** find the first nearest neighbor + virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].finalizePbyP(p_list[iw]); + } + + /** find the first nearest neighbor * @param iat source particle id * @param r distance * @param dr displacement @@ -258,48 +217,36 @@ class DistanceTableT * displacements_[iat] * @return the id of the nearest particle, -1 not found */ - virtual int - get_first_neighbor( - IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; - - [[noreturn]] inline void - print(std::ostream& os) - { - throw std::runtime_error("DistanceTable::print is not supported"); - } - - /// initialize a shared resource and hand it to a collection - virtual void - createResource(ResourceCollection& collection) const - { - } - - /// acquire a shared resource from a collection - virtual void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const - { - } - - /// return a shared resource to a collection - virtual void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const - { - } + virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; + + [[noreturn]] inline void print(std::ostream& os) + { + throw std::runtime_error("DistanceTable::print is not supported"); + } + + /// initialize a shared resource and hand it to a collection + virtual void createResource(ResourceCollection& collection) const {} + + /// acquire a shared resource from a collection + virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const + {} + + /// return a shared resource to a collection + virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const + {} }; /** AA type of DistanceTable containing storage */ -template +template class DistanceTableAAT : public DistanceTableT { public: - using DistRow = typename DistanceTableT::DistRow; - using DisplRow = typename DistanceTableT::DisplRow; - using RealType = typename DistanceTableT::RealType; + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| * Note: Derived classes decide if it is a memory view or the actual * storage For only the lower triangle (j=i terms as the nature of @@ -307,223 +254,152 @@ class DistanceTableAAT : public DistanceTableT * segment, out-of-bound access is still within the segment and thus doesn't * trigger an alarm by the address sanitizer. */ - std::vector distances_; + std::vector distances_; - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - * r_A1[i] Note: Derived classes decide if it is a memory view or the actual * storage only the lower triangle (j displacements_; + std::vector displacements_; - /// temp_r - DistRow temp_r_; + /// temp_r + DistRow temp_r_; - /// temp_dr - DisplRow temp_dr_; + /// temp_dr + DisplRow temp_dr_; - /// old distances - DistRow old_r_; + /// old distances + DistRow old_r_; - /// old displacements - DisplRow old_dr_; + /// old displacements + DisplRow old_dr_; public: - /// constructor using source and target ParticleSet - DistanceTableAAT(const ParticleSetT& target, DTModes modes) : - DistanceTableT(target, target, modes) - { - } + /// constructor using source and target ParticleSet + DistanceTableAAT(const ParticleSetT& target, DTModes modes) : DistanceTableT(target, target, modes) {} - /** return full table distances + /** return full table distances */ - const std::vector& - getDistances() const - { - return distances_; - } + const std::vector& getDistances() const { return distances_; } - /** return full table displacements + /** return full table displacements */ - const std::vector& - getDisplacements() const - { - return displacements_; - } + const std::vector& getDisplacements() const { return displacements_; } - /** return a row of distances for a given target particle + /** return a row of distances for a given target particle */ - const DistRow& - getDistRow(int iel) const - { - return distances_[iel]; - } + const DistRow& getDistRow(int iel) const { return distances_[iel]; } - /** return a row of displacements for a given target particle + /** return a row of displacements for a given target particle */ - const DisplRow& - getDisplRow(int iel) const - { - return displacements_[iel]; - } + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - /** return the temporary distances when a move is proposed + /** return the temporary distances when a move is proposed */ - const DistRow& - getTempDists() const - { - return temp_r_; - } + const DistRow& getTempDists() const { return temp_r_; } - /** return the temporary displacements when a move is proposed + /** return the temporary displacements when a move is proposed */ - const DisplRow& - getTempDispls() const - { - return temp_dr_; - } + const DisplRow& getTempDispls() const { return temp_dr_; } - /** return old distances set up by move() for optimized distance table + /** return old distances set up by move() for optimized distance table * consumers */ - const DistRow& - getOldDists() const - { - return old_r_; - } + const DistRow& getOldDists() const { return old_r_; } - /** return old displacements set up by move() for optimized distance table + /** return old displacements set up by move() for optimized distance table * consumers */ - const DisplRow& - getOldDispls() const - { - return old_dr_; - } - - virtual size_t - get_num_particls_stored() const - { - return 0; - } - - /// return multi walker temporary pair distance table data pointer - [[noreturn]] virtual const RealType* - getMultiWalkerTempDataPtr() const - { - throw std::runtime_error( - this->name_ + " multi walker data pointer for temp not supported"); - } - - virtual const RealType* - mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, - const RefVectorWithLeader>& p_list, size_t range_begin, - size_t range_end) const - { - return nullptr; - } + const DisplRow& getOldDispls() const { return old_dr_; } + + virtual size_t get_num_particls_stored() const { return 0; } + + /// return multi walker temporary pair distance table data pointer + [[noreturn]] virtual const RealType* getMultiWalkerTempDataPtr() const + { + throw std::runtime_error(this->name_ + " multi walker data pointer for temp not supported"); + } + + virtual const RealType* mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + size_t range_begin, + size_t range_end) const + { + return nullptr; + } }; /** AB type of DistanceTable containing storage */ -template +template class DistanceTableABT : public DistanceTableT { public: - using DistRow = typename DistanceTableT::DistRow; - using DisplRow = typename DistanceTableT::DisplRow; - using RealType = typename DistanceTableT::RealType; + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| * Note: Derived classes decide if it is a memory view or the actual * storage */ - std::vector distances_; + std::vector distances_; - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - * r_A1[i] Note: Derived classes decide if it is a memory view or the actual * storage */ - std::vector displacements_; + std::vector displacements_; - /// temp_r - DistRow temp_r_; + /// temp_r + DistRow temp_r_; - /// temp_dr - DisplRow temp_dr_; + /// temp_dr + DisplRow temp_dr_; public: - /// constructor using source and target ParticleSet - DistanceTableABT(const ParticleSetT& source, - const ParticleSetT& target, DTModes modes) : - DistanceTableT(source, target, modes) - { - } - - /** return full table distances + /// constructor using source and target ParticleSet + DistanceTableABT(const ParticleSetT& source, const ParticleSetT& target, DTModes modes) + : DistanceTableT(source, target, modes) + {} + + /** return full table distances */ - const std::vector& - getDistances() const - { - return distances_; - } + const std::vector& getDistances() const { return distances_; } - /** return full table displacements + /** return full table displacements */ - const std::vector& - getDisplacements() const - { - return displacements_; - } + const std::vector& getDisplacements() const { return displacements_; } - /** return a row of distances for a given target particle + /** return a row of distances for a given target particle */ - const DistRow& - getDistRow(int iel) const - { - return distances_[iel]; - } + const DistRow& getDistRow(int iel) const { return distances_[iel]; } - /** return a row of displacements for a given target particle + /** return a row of displacements for a given target particle */ - const DisplRow& - getDisplRow(int iel) const - { - return displacements_[iel]; - } + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - /** return the temporary distances when a move is proposed + /** return the temporary distances when a move is proposed */ - const DistRow& - getTempDists() const - { - return temp_r_; - } + const DistRow& getTempDists() const { return temp_r_; } - /** return the temporary displacements when a move is proposed + /** return the temporary displacements when a move is proposed */ - const DisplRow& - getTempDispls() const - { - return temp_dr_; - } - - /// return multi-walker full (all pairs) distance table data pointer - [[noreturn]] virtual const RealType* - getMultiWalkerDataPtr() const - { - throw std::runtime_error( - this->name_ + " multi walker data pointer not supported"); - } - - /// return stride of per target pctl data. full table data = stride * num of - /// target particles - [[noreturn]] virtual size_t - getPerTargetPctlStrideSize() const - { - throw std::runtime_error( - this->name_ + " getPerTargetPctlStrideSize not supported"); - } + const DisplRow& getTempDispls() const { return temp_dr_; } + + /// return multi-walker full (all pairs) distance table data pointer + [[noreturn]] virtual const RealType* getMultiWalkerDataPtr() const + { + throw std::runtime_error(this->name_ + " multi walker data pointer not supported"); + } + + /// return stride of per target pctl data. full table data = stride * num of + /// target particles + [[noreturn]] virtual size_t getPerTargetPctlStrideSize() const + { + throw std::runtime_error(this->name_ + " getPerTargetPctlStrideSize not supported"); + } }; } // namespace qmcplusplus #endif diff --git a/src/Particle/DynamicCoordinatesT.cpp b/src/Particle/DynamicCoordinatesT.cpp index b563d264c18..a16ddcdfe48 100644 --- a/src/Particle/DynamicCoordinatesT.cpp +++ b/src/Particle/DynamicCoordinatesT.cpp @@ -18,26 +18,22 @@ namespace qmcplusplus { /** create DynamicCoordinates based on kind */ -template -std::unique_ptr> -createDynamicCoordinatesT(const DynamicCoordinateKind kind) +template +std::unique_ptr> createDynamicCoordinatesT(const DynamicCoordinateKind kind) { - if (kind == DynamicCoordinateKind::DC_POS) - return std::make_unique>(); - else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD) - return std::make_unique>(); - // dummy return - return std::unique_ptr>(); + if (kind == DynamicCoordinateKind::DC_POS) + return std::make_unique>(); + else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD) + return std::make_unique>(); + // dummy return + return std::unique_ptr>(); } -template std::unique_ptr> -createDynamicCoordinatesT(const DynamicCoordinateKind kind); -template std::unique_ptr> -createDynamicCoordinatesT(const DynamicCoordinateKind kind); -template std::unique_ptr>> -createDynamicCoordinatesT>( +template std::unique_ptr> createDynamicCoordinatesT( const DynamicCoordinateKind kind); -template std::unique_ptr>> -createDynamicCoordinatesT>( +template std::unique_ptr> createDynamicCoordinatesT(const DynamicCoordinateKind kind); +template std::unique_ptr>> createDynamicCoordinatesT>( + const DynamicCoordinateKind kind); +template std::unique_ptr>> createDynamicCoordinatesT>( const DynamicCoordinateKind kind); } // namespace qmcplusplus diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h index 36d94868d66..18d616bb9ae 100644 --- a/src/Particle/DynamicCoordinatesT.h +++ b/src/Particle/DynamicCoordinatesT.h @@ -9,8 +9,6 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// -/** @file DynamicCoordinatesT.h - */ #ifndef QMCPLUSPLUS_DYNAMICCOORDINATEST_H #define QMCPLUSPLUS_DYNAMICCOORDINATEST_H @@ -34,119 +32,91 @@ enum class DynamicCoordinateKind /** quantum variables of all the particles */ -template +template class DynamicCoordinatesT { public: - using RealType = typename ParticleSetTraits::RealType; - using PosType = typename ParticleSetTraits::PosType; - using ParticlePos = typename LatticeParticleTraits::ParticlePos; - using PosVectorSoa = - VectorSoaContainer::DIM>; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using PosVectorSoa = VectorSoaContainer::DIM>; - DynamicCoordinatesT(const DynamicCoordinateKind kind_in) : - variable_kind_(kind_in) - { - } + DynamicCoordinatesT(const DynamicCoordinateKind kind_in) : variable_kind_(kind_in) {} - DynamicCoordinatesT(const DynamicCoordinatesT&) = default; - DynamicCoordinatesT& - operator=(const DynamicCoordinatesT&) = delete; + DynamicCoordinatesT(const DynamicCoordinatesT&) = default; + DynamicCoordinatesT& operator=(const DynamicCoordinatesT&) = delete; - DynamicCoordinateKind - getKind() const - { - return variable_kind_; - } + DynamicCoordinateKind getKind() const { return variable_kind_; } - virtual ~DynamicCoordinatesT() = default; + virtual ~DynamicCoordinatesT() = default; - virtual std::unique_ptr - makeClone() = 0; + virtual std::unique_ptr makeClone() = 0; - /** resize internal storages based on the number of particles + /** resize internal storages based on the number of particles * @param n the number of particles */ - virtual void - resize(size_t n) = 0; - /// return the number of particles - virtual size_t - size() const = 0; - - /// overwrite the positions of all the particles. - virtual void - setAllParticlePos(const ParticlePos& R) = 0; - /// overwrite the position of one the particle. - virtual void - setOneParticlePos(const PosType& pos, size_t iat) = 0; - /** copy the active positions of particles with a uniform id in all the + virtual void resize(size_t n) = 0; + /// return the number of particles + virtual size_t size() const = 0; + + /// overwrite the positions of all the particles. + virtual void setAllParticlePos(const ParticlePos& R) = 0; + /// overwrite the position of one the particle. + virtual void setOneParticlePos(const PosType& pos, size_t iat) = 0; + /** copy the active positions of particles with a uniform id in all the * walkers to a single internal buffer. * @param coords_list a batch of DynamicCoordinates * @param iat paricle id, uniform across coords_list * @param new_positions proposed positions */ - virtual void - mw_copyActivePos( - const RefVectorWithLeader& coords_list, size_t iat, - const std::vector& new_positions) const - { - assert(this == &coords_list.getLeader()); - } - - /** overwrite the positions of particles with a uniform id in all the + virtual void mw_copyActivePos(const RefVectorWithLeader& coords_list, + size_t iat, + const std::vector& new_positions) const + { + assert(this == &coords_list.getLeader()); + } + + /** overwrite the positions of particles with a uniform id in all the * walkers upon acceptance. * @param coords_list a batch of DynamicCoordinates * @param iat paricle id, uniform across coords_list * @param new_positions proposed positions * @param isAccepted accept/reject info */ - virtual void - mw_acceptParticlePos( - const RefVectorWithLeader& coords_list, size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const = 0; - - /// all particle position accessor - virtual const PosVectorSoa& - getAllParticlePos() const = 0; - /// one particle position accessor - virtual PosType - getOneParticlePos(size_t iat) const = 0; - - /// secure internal data consistency after p-by-p moves - virtual void - donePbyP() - { - } - - /// initialize a shared resource and hand it to a collection - virtual void - createResource(ResourceCollection& collection) const - { - } - - /// acquire a shared resource from a collection - virtual void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - { - } - - /// return a shared resource to a collection - virtual void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - { - } + virtual void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, + size_t iat, + const std::vector& new_positions, + const std::vector& isAccepted) const = 0; + + /// all particle position accessor + virtual const PosVectorSoa& getAllParticlePos() const = 0; + /// one particle position accessor + virtual PosType getOneParticlePos(size_t iat) const = 0; + + /// secure internal data consistency after p-by-p moves + virtual void donePbyP() {} + + /// initialize a shared resource and hand it to a collection + virtual void createResource(ResourceCollection& collection) const {} + + /// acquire a shared resource from a collection + virtual void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + {} + + /// return a shared resource to a collection + virtual void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + {} protected: - /// type of dynamic coordinates - const DynamicCoordinateKind variable_kind_; + /// type of dynamic coordinates + const DynamicCoordinateKind variable_kind_; }; /** create DynamicCoordinates based on kind */ -template +template std::unique_ptr> createDynamicCoordinatesT( const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); } // namespace qmcplusplus diff --git a/src/Particle/InitMolecularSystemT.cpp b/src/Particle/InitMolecularSystemT.cpp index 896a6c1d22f..9fb4b13373e 100644 --- a/src/Particle/InitMolecularSystemT.cpp +++ b/src/Particle/InitMolecularSystemT.cpp @@ -29,282 +29,270 @@ namespace qmcplusplus { -template -InitMolecularSystemT::InitMolecularSystemT( - ParticleSetPoolT& pset, const char* aname) : - OhmmsElementBase(aname), - ptclPool(pset) -{ -} +template +InitMolecularSystemT::InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname) + : OhmmsElementBase(aname), ptclPool(pset) +{} -template -bool -InitMolecularSystemT::put(xmlNodePtr cur) +template +bool InitMolecularSystemT::put(xmlNodePtr cur) { - std::string target("e"), source("i"), volume("no"); - OhmmsAttributeSet hAttrib; - hAttrib.add(target, "target"); - hAttrib.add(source, "source"); - hAttrib.add(volume, "use_volume"); - hAttrib.put(cur); - ParticleSetT* els = ptclPool.getParticleSet(target); - if (els == 0) { - ERRORMSG("No target particle " << target << " exists.") - return false; - } - ParticleSetT* ions = ptclPool.getParticleSet(source); - if (ions == 0) { - ERRORMSG("No source particle " << source << " exists.") - return false; - } - - app_log() << "" << std::endl; - - if (volume == "yes") - initWithVolume(ions, els); - else - initMolecule(ions, els); - - makeUniformRandom(els->spins); - els->spins *= 2 * M_PI; - - app_log() << "" << std::endl; - app_log().flush(); - - return true; + std::string target("e"), source("i"), volume("no"); + OhmmsAttributeSet hAttrib; + hAttrib.add(target, "target"); + hAttrib.add(source, "source"); + hAttrib.add(volume, "use_volume"); + hAttrib.put(cur); + ParticleSetT* els = ptclPool.getParticleSet(target); + if (els == 0) + { + ERRORMSG("No target particle " << target << " exists.") + return false; + } + ParticleSetT* ions = ptclPool.getParticleSet(source); + if (ions == 0) + { + ERRORMSG("No source particle " << source << " exists.") + return false; + } + + app_log() << "" << std::endl; + + if (volume == "yes") + initWithVolume(ions, els); + else + initMolecule(ions, els); + + makeUniformRandom(els->spins); + els->spins *= 2 * M_PI; + + app_log() << "" << std::endl; + app_log().flush(); + + return true; } -template -void -InitMolecularSystemT::initAtom(ParticleSetT* ions, ParticleSetT* els) +template +void InitMolecularSystemT::initAtom(ParticleSetT* ions, ParticleSetT* els) { - // 3N-dimensional Gaussian - typename ParticleSetT::ParticlePos chi(els->getTotalNum()); - makeGaussRandom(chi); - RealType q = std::sqrt(static_cast(els->getTotalNum())) * 0.5; - int nel(els->getTotalNum()), items(0); - while (nel) { - els->R[items] = ions->R[0] + q * chi[items]; - --nel; - ++items; - } + // 3N-dimensional Gaussian + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); + makeGaussRandom(chi); + RealType q = std::sqrt(static_cast(els->getTotalNum())) * 0.5; + int nel(els->getTotalNum()), items(0); + while (nel) + { + els->R[items] = ions->R[0] + q * chi[items]; + --nel; + ++items; + } } -template +template struct LoneElectronT { - using RealType = TReal; - int ID; - RealType BondLength; - inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl) - { - } + using RealType = TReal; + int ID; + RealType BondLength; + inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl) {} }; -template -void -InitMolecularSystemT::initMolecule( - ParticleSetT* ions, ParticleSetT* els) +template +void InitMolecularSystemT::initMolecule(ParticleSetT* ions, ParticleSetT* els) { - if (ions->getTotalNum() == 1) - return initAtom(ions, els); - - const int d_ii_ID = ions->addTable(*ions); - ions->update(); - const typename ParticleSetT::ParticleIndex& grID(ions->GroupID); - SpeciesSet& Species(ions->getSpeciesSet()); - int Centers = ions->getTotalNum(); - std::vector Qtot(Centers), Qcore(Centers), Qval(Centers, 0); - // use charge as the core electrons first - int icharge = Species.addAttribute("charge"); - // Assign default core charge - for (int iat = 0; iat < Centers; iat++) - Qtot[iat] = static_cast(Species(icharge, grID[iat])); - // cutoff radius (Bohr) this a random choice - RealType cutoff = 4.0; - typename ParticleSetT::ParticlePos chi(els->getTotalNum()); - // makeGaussRandom(chi); - makeSphereRandom(chi); - // the upper limit of the electron index with spin up - const int numUp = els->last(0); - // the upper limit of the electron index with spin down. Pay attention to - // the no spin down electron case. - const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0); - // consumer counter of random numbers chi - int random_number_counter = 0; - int nup_tot = 0, ndown_tot = numUp; - std::vector> loneQ; - RealType rmin = cutoff; - typename ParticleSetT::SingleParticlePos cm; - - const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); - // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, - // put Q[iat]-1 and save the lone electron. - for (size_t iat = 0; iat < Centers; iat++) { - cm += ions->R[iat]; - for (size_t jat = iat + 1; jat < Centers; ++jat) { - rmin = std::min(rmin, dist[jat][iat]); - } - // use 40% of the minimum bond - RealType sep = rmin * 0.4; - int v2 = Qtot[iat] / 2; - if (Qtot[iat] > v2 * 2) { - loneQ.push_back(LoneElectronT(iat, sep)); - } - for (int k = 0; k < v2; k++) { - // initialize electron positions in pairs - if (nup_tot < numUp) - els->R[nup_tot++] = - ions->R[iat] + sep * chi[random_number_counter++]; - if (ndown_tot < numDown) - els->R[ndown_tot++] = - ions->R[iat] + sep * chi[random_number_counter++]; - } + if (ions->getTotalNum() == 1) + return initAtom(ions, els); + + const int d_ii_ID = ions->addTable(*ions); + ions->update(); + const typename ParticleSetT::ParticleIndex& grID(ions->GroupID); + SpeciesSet& Species(ions->getSpeciesSet()); + int Centers = ions->getTotalNum(); + std::vector Qtot(Centers), Qcore(Centers), Qval(Centers, 0); + // use charge as the core electrons first + int icharge = Species.addAttribute("charge"); + // Assign default core charge + for (int iat = 0; iat < Centers; iat++) + Qtot[iat] = static_cast(Species(icharge, grID[iat])); + // cutoff radius (Bohr) this a random choice + RealType cutoff = 4.0; + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); + // makeGaussRandom(chi); + makeSphereRandom(chi); + // the upper limit of the electron index with spin up + const int numUp = els->last(0); + // the upper limit of the electron index with spin down. Pay attention to + // the no spin down electron case. + const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0); + // consumer counter of random numbers chi + int random_number_counter = 0; + int nup_tot = 0, ndown_tot = numUp; + std::vector> loneQ; + RealType rmin = cutoff; + typename ParticleSetT::SingleParticlePos cm; + + const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); + // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, + // put Q[iat]-1 and save the lone electron. + for (size_t iat = 0; iat < Centers; iat++) + { + cm += ions->R[iat]; + for (size_t jat = iat + 1; jat < Centers; ++jat) + { + rmin = std::min(rmin, dist[jat][iat]); } - - // Step 2. Distribute the electrons left alone - // mmorales: changed order of spin assignment to help with spin - // imbalances in molecules at large distances. - // Not guaranteed to work, but should help in most cases - // as long as atoms in molecules are defined sequencially - typename std::vector>::iterator it(loneQ.begin()); - typename std::vector>::iterator it_end(loneQ.end()); - while (it != it_end && nup_tot != numUp && ndown_tot != numDown) { - if (nup_tot < numUp) { - els->R[nup_tot++] = ions->R[(*it).ID] + - (*it).BondLength * chi[random_number_counter++]; - ++it; - } - if (ndown_tot < numDown && it != it_end) { - els->R[ndown_tot++] = ions->R[(*it).ID] + - (*it).BondLength * chi[random_number_counter++]; - ++it; - } + // use 40% of the minimum bond + RealType sep = rmin * 0.4; + int v2 = Qtot[iat] / 2; + if (Qtot[iat] > v2 * 2) + { + loneQ.push_back(LoneElectronT(iat, sep)); } - - // Step 3. Handle more than neutral electrons - // extra electrons around the geometric center - RealType cnorm = 1.0 / static_cast(Centers); - RealType sep = rmin * 2; - cm = cnorm * cm; + for (int k = 0; k < v2; k++) + { + // initialize electron positions in pairs + if (nup_tot < numUp) + els->R[nup_tot++] = ions->R[iat] + sep * chi[random_number_counter++]; + if (ndown_tot < numDown) + els->R[ndown_tot++] = ions->R[iat] + sep * chi[random_number_counter++]; + } + } + + // Step 2. Distribute the electrons left alone + // mmorales: changed order of spin assignment to help with spin + // imbalances in molecules at large distances. + // Not guaranteed to work, but should help in most cases + // as long as atoms in molecules are defined sequencially + typename std::vector>::iterator it(loneQ.begin()); + typename std::vector>::iterator it_end(loneQ.end()); + while (it != it_end && nup_tot != numUp && ndown_tot != numDown) + { if (nup_tot < numUp) - while (nup_tot < numUp) - els->R[nup_tot++] = cm + sep * chi[random_number_counter++]; - if (ndown_tot < numDown) - while (ndown_tot < numDown) - els->R[ndown_tot++] = cm + sep * chi[random_number_counter++]; - - // safety check. all the random numbers should have been consumed once and - // only once. - if (random_number_counter != chi.size()) - throw std::runtime_error("initMolecule unexpected random number " - "consumption. Please report a bug!"); - - // put all the electrons in a unit box - if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) { - els->R.setUnit(PosUnit::Cartesian); - els->applyBC(els->R); - els->update(false); + { + els->R[nup_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++]; + ++it; } + if (ndown_tot < numDown && it != it_end) + { + els->R[ndown_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++]; + ++it; + } + } + + // Step 3. Handle more than neutral electrons + // extra electrons around the geometric center + RealType cnorm = 1.0 / static_cast(Centers); + RealType sep = rmin * 2; + cm = cnorm * cm; + if (nup_tot < numUp) + while (nup_tot < numUp) + els->R[nup_tot++] = cm + sep * chi[random_number_counter++]; + if (ndown_tot < numDown) + while (ndown_tot < numDown) + els->R[ndown_tot++] = cm + sep * chi[random_number_counter++]; + + // safety check. all the random numbers should have been consumed once and + // only once. + if (random_number_counter != chi.size()) + throw std::runtime_error("initMolecule unexpected random number " + "consumption. Please report a bug!"); + + // put all the electrons in a unit box + if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) + { + els->R.setUnit(PosUnit::Cartesian); + els->applyBC(els->R); + els->update(false); + } } /// helper function to determine the lower bound of a domain (need to move up) -template -inline TinyVector -lower_bound(const TinyVector& a, const TinyVector& b) +template +inline TinyVector lower_bound(const TinyVector& a, const TinyVector& b) { - return TinyVector( - std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2])); + return TinyVector(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2])); } /// helper function to determine the upper bound of a domain (need to move up) -template -inline TinyVector -upper_bound(const TinyVector& a, const TinyVector& b) +template +inline TinyVector upper_bound(const TinyVector& a, const TinyVector& b) { - return TinyVector( - std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2])); + return TinyVector(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2])); } -template -void -InitMolecularSystemT::initWithVolume( - ParticleSetT* ions, ParticleSetT* els) +template +void InitMolecularSystemT::initWithVolume(ParticleSetT* ions, ParticleSetT* els) { - TinyVector start(1.0); - TinyVector end(0.0); - - typename ParticleSetT::ParticlePos Ru(ions->getTotalNum()); - Ru.setUnit(PosUnit::Lattice); - ions->applyBC(ions->R, Ru); - - for (int iat = 0; iat < Ru.size(); iat++) { - start = lower_bound(Ru[iat], start); - end = upper_bound(Ru[iat], end); - } - - TinyVector shift; - Tensor newbox(ions->getLattice().R); - - RealType buffer = 2.0; // buffer 2 bohr - for (int idim = 0; idim < OHMMS_DIM; ++idim) { - // if(ions->getLattice().BoxBConds[idim]) - //{ - // start[idim]=0.0; - // end[idim]=1.0; - // shift[idim]=0.0; - // } - // else - { - RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim]; - start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r)); - end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r)); - shift[idim] = start[idim] * ions->getLattice().Length[idim]; - if (std::abs(end[idim] = start[idim]) < - buffer) { // handle singular case - start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0); - end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0); - } - - newbox(idim, idim) = - (end[idim] - start[idim]) * ions->getLattice().Length[idim]; - } + TinyVector start(1.0); + TinyVector end(0.0); + + typename ParticleSetT::ParticlePos Ru(ions->getTotalNum()); + Ru.setUnit(PosUnit::Lattice); + ions->applyBC(ions->R, Ru); + + for (int iat = 0; iat < Ru.size(); iat++) + { + start = lower_bound(Ru[iat], start); + end = upper_bound(Ru[iat], end); + } + + TinyVector shift; + Tensor newbox(ions->getLattice().R); + + RealType buffer = 2.0; // buffer 2 bohr + for (int idim = 0; idim < OHMMS_DIM; ++idim) + { + // if(ions->getLattice().BoxBConds[idim]) + //{ + // start[idim]=0.0; + // end[idim]=1.0; + // shift[idim]=0.0; + // } + // else + { + RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim]; + start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r)); + end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r)); + shift[idim] = start[idim] * ions->getLattice().Length[idim]; + if (std::abs(end[idim] = start[idim]) < buffer) + { // handle singular case + start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0); + end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0); + } + + newbox(idim, idim) = (end[idim] - start[idim]) * ions->getLattice().Length[idim]; } + } - typename ParticleSetT::ParticleLayout slattice(ions->getLattice()); - slattice.set(newbox); + typename ParticleSetT::ParticleLayout slattice(ions->getLattice()); + slattice.set(newbox); - app_log() << " InitMolecularSystem::initWithVolume " << std::endl; - app_log() << " Effective Lattice shifted by " << shift << std::endl; - app_log() << newbox << std::endl; + app_log() << " InitMolecularSystem::initWithVolume " << std::endl; + app_log() << " Effective Lattice shifted by " << shift << std::endl; + app_log() << newbox << std::endl; - Ru.resize(els->getTotalNum()); - makeUniformRandom(Ru); - for (int iat = 0; iat < Ru.size(); ++iat) - els->R[iat] = slattice.toCart(Ru[iat]) + shift; - els->R.setUnit(PosUnit::Cartesian); + Ru.resize(els->getTotalNum()); + makeUniformRandom(Ru); + for (int iat = 0; iat < Ru.size(); ++iat) + els->R[iat] = slattice.toCart(Ru[iat]) + shift; + els->R.setUnit(PosUnit::Cartesian); } -template -bool -InitMolecularSystemT::put(std::istream& is) +template +bool InitMolecularSystemT::put(std::istream& is) { - return true; + return true; } -template -bool -InitMolecularSystemT::get(std::ostream& os) const +template +bool InitMolecularSystemT::get(std::ostream& os) const { - return true; + return true; } -template -void -InitMolecularSystemT::reset() -{ -} +template +void InitMolecularSystemT::reset() +{} #ifndef QMC_COMPLEX #ifndef MIXED_PRECISION diff --git a/src/Particle/InitMolecularSystemT.h b/src/Particle/InitMolecularSystemT.h index 3bfe148db5e..78894043fbc 100644 --- a/src/Particle/InitMolecularSystemT.h +++ b/src/Particle/InitMolecularSystemT.h @@ -23,57 +23,50 @@ namespace qmcplusplus { -template +template class ParticleSetT; -template +template class ParticleSetPoolT; /* Engine to initialize the initial electronic structure for a molecular system */ -template +template class InitMolecularSystemT : public OhmmsElementBase { public: - using RealType = typename ParticleSetTraits::RealType; + using RealType = typename ParticleSetTraits::RealType; - InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname = "mosystem"); + InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname = "mosystem"); - bool - get(std::ostream& os) const override; - bool - put(std::istream& is) override; - bool - put(xmlNodePtr cur) override; - void - reset() override; + bool get(std::ostream& os) const override; + bool put(std::istream& is) override; + bool put(xmlNodePtr cur) override; + void reset() override; - /** initialize els for an atom + /** initialize els for an atom */ - void - initAtom(ParticleSetT* ions, ParticleSetT* els); - /** initialize els position for a molecule + void initAtom(ParticleSetT* ions, ParticleSetT* els); + /** initialize els position for a molecule * * Use the valence of each ionic species on a sphere */ - void - initMolecule(ParticleSetT* ions, ParticleSetT* els); - /** initialize els for the systems with a mixed boundary + void initMolecule(ParticleSetT* ions, ParticleSetT* els); + /** initialize els for the systems with a mixed boundary * * Use the bound of the ionic systems and uniform random positions within a * reduced box */ - void - initWithVolume(ParticleSetT* ions, ParticleSetT* els); + void initWithVolume(ParticleSetT* ions, ParticleSetT* els); private: - /** pointer to ParticleSetPool + /** pointer to ParticleSetPool * * QMCHamiltonian needs to know which ParticleSet object * is used as an input object for the evaluations. * Any number of ParticleSet can be used to describe * a QMCHamiltonian. */ - ParticleSetPoolT& ptclPool; + ParticleSetPoolT& ptclPool; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BasisSetBaseT.h b/src/QMCWaveFunctions/BasisSetBaseT.h index 569abf9173f..243e51cab04 100644 --- a/src/QMCWaveFunctions/BasisSetBaseT.h +++ b/src/QMCWaveFunctions/BasisSetBaseT.h @@ -32,109 +32,92 @@ namespace qmcplusplus * Define a common storage for the derived classes and * provides a minimal set of interfaces to get/set BasisSetSize. */ -template +template struct BasisSetBaseT : public OrbitalSetTraits { - enum + enum + { + MAXINDEX = 2 + OHMMS_DIM + }; + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using IndexType = typename OrbitalSetTraits::IndexType; + using HessType = typename OrbitalSetTraits::HessType; + using IndexVector = typename OrbitalSetTraits::IndexVector; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; + using GradVector = typename OrbitalSetTraits::GradVector; + using GradMatrix = typename OrbitalSetTraits::GradMatrix; + using HessVector = typename OrbitalSetTraits::HessVector; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGType = TinyVector; + using GGGVector = Vector; + using GGGMatrix = Matrix; + + /// size of the basis set + IndexType BasisSetSize; + /// index of the particle + IndexType ActivePtcl; + /// counter to keep track + unsigned long Counter; + /// phi[i] the value of the i-th basis set + ValueVector Phi; + /// dphi[i] the gradient of the i-th basis set + GradVector dPhi; + /// d2phi[i] the laplacian of the i-th basis set + ValueVector d2Phi; + /// grad_grad_Phi[i] the full hessian of the i-th basis set + HessVector grad_grad_Phi; + /// grad_grad_grad_Phi the full hessian of the i-th basis set + GGGVector grad_grad_grad_Phi; + /// container to store value, laplacian and gradient + ValueMatrix Temp; + + ValueMatrix Y; + GradMatrix dY; + ValueMatrix d2Y; + + /// default constructor + BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0) {} + /// virtual destructor + virtual ~BasisSetBaseT() {} + /** resize the container */ + void resize(int ntargets) + { + if (BasisSetSize) { - MAXINDEX = 2 + OHMMS_DIM - }; - using RealType = typename OrbitalSetTraits::RealType; - using ValueType = typename OrbitalSetTraits::ValueType; - using IndexType = typename OrbitalSetTraits::IndexType; - using HessType = typename OrbitalSetTraits::HessType; - using IndexVector = typename OrbitalSetTraits::IndexVector; - using ValueVector = typename OrbitalSetTraits::ValueVector; - using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; - using GradVector = typename OrbitalSetTraits::GradVector; - using GradMatrix = typename OrbitalSetTraits::GradMatrix; - using HessVector = typename OrbitalSetTraits::HessVector; - using HessMatrix = typename OrbitalSetTraits::HessMatrix; - using GGGType = TinyVector; - using GGGVector = Vector; - using GGGMatrix = Matrix; - - /// size of the basis set - IndexType BasisSetSize; - /// index of the particle - IndexType ActivePtcl; - /// counter to keep track - unsigned long Counter; - /// phi[i] the value of the i-th basis set - ValueVector Phi; - /// dphi[i] the gradient of the i-th basis set - GradVector dPhi; - /// d2phi[i] the laplacian of the i-th basis set - ValueVector d2Phi; - /// grad_grad_Phi[i] the full hessian of the i-th basis set - HessVector grad_grad_Phi; - /// grad_grad_grad_Phi the full hessian of the i-th basis set - GGGVector grad_grad_grad_Phi; - /// container to store value, laplacian and gradient - ValueMatrix Temp; - - ValueMatrix Y; - GradMatrix dY; - ValueMatrix d2Y; - - /// default constructor - BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0) - { - } - /// virtual destructor - virtual ~BasisSetBaseT() - { - } - /** resize the container */ - void - resize(int ntargets) - { - if (BasisSetSize) { - Phi.resize(BasisSetSize); - dPhi.resize(BasisSetSize); - d2Phi.resize(BasisSetSize); - grad_grad_Phi.resize(BasisSetSize); - grad_grad_grad_Phi.resize(BasisSetSize); - Temp.resize(BasisSetSize, MAXINDEX); - Y.resize(ntargets, BasisSetSize); - dY.resize(ntargets, BasisSetSize); - d2Y.resize(ntargets, BasisSetSize); - } - else { - app_error() << " BasisSetBase::BasisSetSize == 0" << std::endl; - } + Phi.resize(BasisSetSize); + dPhi.resize(BasisSetSize); + d2Phi.resize(BasisSetSize); + grad_grad_Phi.resize(BasisSetSize); + grad_grad_grad_Phi.resize(BasisSetSize); + Temp.resize(BasisSetSize, MAXINDEX); + Y.resize(ntargets, BasisSetSize); + dY.resize(ntargets, BasisSetSize); + d2Y.resize(ntargets, BasisSetSize); } - - /// clone the basis set - virtual BasisSetBaseT* - makeClone() const = 0; - /** return the basis set size */ - inline IndexType - getBasisSetSize() const + else { - return BasisSetSize; + app_error() << " BasisSetBase::BasisSetSize == 0" << std::endl; } - - /// resize the basis set - virtual void - setBasisSetSize(int nbs) = 0; - - virtual void - evaluateWithHessian(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateWithThirdDeriv(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateThirdDerivOnly(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateForWalkerMove(const ParticleSetT& P) = 0; - virtual void - evaluateForWalkerMove(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateForPtclMove(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateAllForPtclMove(const ParticleSetT& P, int iat) = 0; - virtual void - evaluateForPtclMoveWithHessian(const ParticleSetT& P, int iat) = 0; + } + + /// clone the basis set + virtual BasisSetBaseT* makeClone() const = 0; + /** return the basis set size */ + inline IndexType getBasisSetSize() const { return BasisSetSize; } + + /// resize the basis set + virtual void setBasisSetSize(int nbs) = 0; + + virtual void evaluateWithHessian(const ParticleSetT& P, int iat) = 0; + virtual void evaluateWithThirdDeriv(const ParticleSetT& P, int iat) = 0; + virtual void evaluateThirdDerivOnly(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForWalkerMove(const ParticleSetT& P) = 0; + virtual void evaluateForWalkerMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateAllForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForPtclMoveWithHessian(const ParticleSetT& P, int iat) = 0; }; /** Base for real basis set @@ -142,96 +125,76 @@ struct BasisSetBaseT : public OrbitalSetTraits * Equivalent to BasisSetBase with minimum requirements * Used by LCAO */ -template +template struct SoaBasisSetBaseT { - using value_type = T; - using vgl_type = VectorSoaContainer; - using vgh_type = VectorSoaContainer; - using vghgh_type = VectorSoaContainer; - using OffloadMWVGLArray = - Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = - Array>; // [walker, Orbs] - - /// size of the basis set - int BasisSetSize; - - virtual ~SoaBasisSetBaseT() = default; - inline int - getBasisSetSize() - { - return BasisSetSize; - } - - virtual SoaBasisSetBaseT* - makeClone() const = 0; - virtual void - setBasisSetSize(int nbs) = 0; - - // Evaluates value, gradient, and laplacian for electron "iat". Parks them - // into a temporary data structure "vgl". - virtual void - evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) = 0; - // Evaluates value, gradient, and laplacian for electron "iat". places them - // in a offload array for batched code. - virtual void - mw_evaluateVGL(const RefVectorWithLeader>& P_list, int iat, - OffloadMWVGLArray& vgl) = 0; - // Evaluates value for electron "iat". places it in a offload array for - // batched code. - virtual void - mw_evaluateValue(const RefVectorWithLeader>& P_list, - int iat, OffloadMWVArray& v) = 0; - // Evaluates value, gradient, and Hessian for electron "iat". Parks them - // into a temporary data structure "vgh". - virtual void - evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) = 0; - // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron - // "iat". Parks them into a temporary data structure "vghgh". - virtual void - evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) = 0; - // Evaluates the x,y, and z components of ionic gradient associated with - // "jion" of value. Parks the raw data into "vgl" container. - virtual void - evaluateGradSourceV(const ParticleSetT& P, int iat, - const ParticleSetT& ions, int jion, vgl_type& vgl) = 0; - // Evaluates the x,y, and z components of ionic gradient associated with - // "jion" value, gradient, and laplacian. - // Parks the raw data into "vghgh" container. - virtual void - evaluateGradSourceVGL(const ParticleSetT& P, int iat, - const ParticleSetT& ions, int jion, vghgh_type& vghgh) = 0; - virtual void - evaluateV(const ParticleSetT& P, int iat, value_type* restrict vals) = 0; - virtual bool - is_S_orbital(int mo_idx, int ao_idx) - { - return false; - } - - /// Determine which orbitals are S-type. Used for cusp correction. - virtual void - queryOrbitalsForSType(const std::vector& corrCenter, - std::vector& is_s_orbital) const - { - } - - /** initialize a shared resource and hand it to collection + using value_type = T; + using vgl_type = VectorSoaContainer; + using vgh_type = VectorSoaContainer; + using vghgh_type = VectorSoaContainer; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = Array>; // [walker, Orbs] + + /// size of the basis set + int BasisSetSize; + + virtual ~SoaBasisSetBaseT() = default; + inline int getBasisSetSize() { return BasisSetSize; } + + virtual SoaBasisSetBaseT* makeClone() const = 0; + virtual void setBasisSetSize(int nbs) = 0; + + // Evaluates value, gradient, and laplacian for electron "iat". Parks them + // into a temporary data structure "vgl". + virtual void evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) = 0; + // Evaluates value, gradient, and laplacian for electron "iat". places them + // in a offload array for batched code. + virtual void mw_evaluateVGL(const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& vgl) = 0; + // Evaluates value for electron "iat". places it in a offload array for + // batched code. + virtual void mw_evaluateValue(const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& v) = 0; + // Evaluates value, gradient, and Hessian for electron "iat". Parks them + // into a temporary data structure "vgh". + virtual void evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) = 0; + // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron + // "iat". Parks them into a temporary data structure "vghgh". + virtual void evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) = 0; + // Evaluates the x,y, and z components of ionic gradient associated with + // "jion" of value. Parks the raw data into "vgl" container. + virtual void evaluateGradSourceV(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vgl_type& vgl) = 0; + // Evaluates the x,y, and z components of ionic gradient associated with + // "jion" value, gradient, and laplacian. + // Parks the raw data into "vghgh" container. + virtual void evaluateGradSourceVGL(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vghgh_type& vghgh) = 0; + virtual void evaluateV(const ParticleSetT& P, int iat, value_type* restrict vals) = 0; + virtual bool is_S_orbital(int mo_idx, int ao_idx) { return false; } + + /// Determine which orbitals are S-type. Used for cusp correction. + virtual void queryOrbitalsForSType(const std::vector& corrCenter, std::vector& is_s_orbital) const {} + + /** initialize a shared resource and hand it to collection */ - virtual void createResource(ResourceCollection& collection) const {} + virtual void createResource(ResourceCollection& collection) const {} - /** acquire a shared resource from collection + /** acquire a shared resource from collection */ - virtual void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& bset_list) const - {} + virtual void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& bset_list) const + {} - /** return a shared resource to collection + /** return a shared resource to collection */ - virtual void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& bset_list) const - {} + virtual void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& bset_list) const + {} }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp index 83cdfbd190e..c342fc195ed 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp @@ -15,10 +15,6 @@ // at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// -/** @file BsplineReaderBaseT.cpp - * - * Implement super function - */ #include "BsplineReaderBaseT.h" #include "Message/CommOperators.h" @@ -30,162 +26,155 @@ namespace qmcplusplus { -template -BsplineReaderBaseT::BsplineReaderBaseT(EinsplineSetBuilderT* e) : - mybuilder(e), - MeshSize(0), - checkNorm(true), - saveSplineCoefs(false), - rotate(true) +template +BsplineReaderBaseT::BsplineReaderBaseT(EinsplineSetBuilderT* e) + : mybuilder(e), MeshSize(0), checkNorm(true), saveSplineCoefs(false), rotate(true) { - myComm = mybuilder->getCommunicator(); + myComm = mybuilder->getCommunicator(); } -template -void -BsplineReaderBaseT::get_psi_g( - int ti, int spin, int ib, Vector>& cG) +template +void BsplineReaderBaseT::get_psi_g(int ti, int spin, int ib, Vector>& cG) { - int ncg = 0; - if (myComm->rank() == 0) { - std::string path = psi_g_path(ti, spin, ib); - mybuilder->H5File.read(cG, path); - ncg = cG.size(); - } - myComm->bcast(ncg); - if (ncg != mybuilder->MaxNumGvecs) { - APP_ABORT("Failed : ncg != MaxNumGvecs"); - } - myComm->bcast(cG); + int ncg = 0; + if (myComm->rank() == 0) + { + std::string path = psi_g_path(ti, spin, ib); + mybuilder->H5File.read(cG, path); + ncg = cG.size(); + } + myComm->bcast(ncg); + if (ncg != mybuilder->MaxNumGvecs) + { + APP_ABORT("Failed : ncg != MaxNumGvecs"); + } + myComm->bcast(cG); } -template +template BsplineReaderBaseT::~BsplineReaderBaseT() -{ -} +{} -inline std::string -make_bandinfo_filename(const std::string& root, int spin, int twist, - const Tensor& tilematrix, int gid) +inline std::string make_bandinfo_filename(const std::string& root, + int spin, + int twist, + const Tensor& tilematrix, + int gid) { - std::ostringstream oo; - oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) - << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1) - << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) - << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist; - if (gid >= 0) - oo << ".g" << gid; - return oo.str(); + std::ostringstream oo; + oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0) + << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_" + << spin << ".tw_" << twist; + if (gid >= 0) + oo << ".g" << gid; + return oo.str(); } -inline std::string -make_bandgroup_name(const std::string& root, int spin, int twist, - const Tensor& tilematrix, int first, int last) +inline std::string make_bandgroup_name(const std::string& root, + int spin, + int twist, + const Tensor& tilematrix, + int first, + int last) { - std::ostringstream oo; - oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) - << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1) - << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) - << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist << ".l" - << first << "u" << last; - return oo.str(); + std::ostringstream oo; + oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0) + << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_" + << spin << ".tw_" << twist << ".l" << first << "u" << last; + return oo.str(); } -template -void -BsplineReaderBaseT::setCommon(xmlNodePtr cur) +template +void BsplineReaderBaseT::setCommon(xmlNodePtr cur) { - // check orbital normalization by default - std::string checkOrbNorm("yes"); - std::string saveCoefs("no"); - OhmmsAttributeSet a; - a.add(checkOrbNorm, "check_orb_norm"); - a.add(saveCoefs, "save_coefs"); - a.put(cur); - - // allow user to turn off norm check with a warning - if (checkOrbNorm == "no") { - app_log() << "WARNING: disable orbital normalization check!" - << std::endl; - checkNorm = false; - } - saveSplineCoefs = saveCoefs == "yes"; + // check orbital normalization by default + std::string checkOrbNorm("yes"); + std::string saveCoefs("no"); + OhmmsAttributeSet a; + a.add(checkOrbNorm, "check_orb_norm"); + a.add(saveCoefs, "save_coefs"); + a.put(cur); + + // allow user to turn off norm check with a warning + if (checkOrbNorm == "no") + { + app_log() << "WARNING: disable orbital normalization check!" << std::endl; + checkNorm = false; + } + saveSplineCoefs = saveCoefs == "yes"; } -template -std::unique_ptr> -BsplineReaderBaseT::create_spline_set(int spin, xmlNodePtr cur) +template +std::unique_ptr> BsplineReaderBaseT::create_spline_set(int spin, xmlNodePtr cur) { - int ns(0); - std::string spo_object_name; - OhmmsAttributeSet a; - a.add(ns, "size"); - a.add(spo_object_name, "name"); - a.add(spo_object_name, "id"); - a.put(cur); - - if (ns == 0) - APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing"); - - if (spo2band.empty()) - spo2band.resize(mybuilder->states.size()); - - std::vector& fullband = (*(mybuilder->FullBands[spin])); - - if (spo2band[spin].empty()) { - spo2band[spin].reserve(fullband.size()); - if (!mybuilder->states[spin]) - mybuilder->states[spin] = std::make_unique(); - mybuilder->clear_states(spin); - initialize_spo2band( - spin, fullband, *mybuilder->states[spin], spo2band[spin]); - } - - BandInfoGroup vals; - vals.TwistIndex = fullband[0].TwistIndex; - vals.GroupID = 0; - vals.myName = make_bandgroup_name(mybuilder->getName(), spin, - mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns); - vals.selectBands(fullband, 0, ns, false); - - return create_spline_set(spo_object_name, spin, vals); + int ns(0); + std::string spo_object_name; + OhmmsAttributeSet a; + a.add(ns, "size"); + a.add(spo_object_name, "name"); + a.add(spo_object_name, "id"); + a.put(cur); + + if (ns == 0) + APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing"); + + if (spo2band.empty()) + spo2band.resize(mybuilder->states.size()); + + std::vector& fullband = (*(mybuilder->FullBands[spin])); + + if (spo2band[spin].empty()) + { + spo2band[spin].reserve(fullband.size()); + if (!mybuilder->states[spin]) + mybuilder->states[spin] = std::make_unique(); + mybuilder->clear_states(spin); + initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]); + } + + BandInfoGroup vals; + vals.TwistIndex = fullband[0].TwistIndex; + vals.GroupID = 0; + vals.myName = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns); + vals.selectBands(fullband, 0, ns, false); + + return create_spline_set(spo_object_name, spin, vals); } -template -std::unique_ptr> -BsplineReaderBaseT::create_spline_set( - int spin, xmlNodePtr cur, SPOSetInputInfo& input_info) +template +std::unique_ptr> BsplineReaderBaseT::create_spline_set(int spin, + xmlNodePtr cur, + SPOSetInputInfo& input_info) { - std::string spo_object_name; - OhmmsAttributeSet a; - a.add(spo_object_name, "name"); - a.add(spo_object_name, "id"); - a.put(cur); - - if (spo2band.empty()) - spo2band.resize(mybuilder->states.size()); - - std::vector& fullband = (*(mybuilder->FullBands[spin])); - - if (spo2band[spin].empty()) { - spo2band[spin].reserve(fullband.size()); - if (!mybuilder->states[spin]) - mybuilder->states[spin] = std::make_unique(); - mybuilder->clear_states(spin); - initialize_spo2band( - spin, fullband, *mybuilder->states[spin], spo2band[spin]); - } - - BandInfoGroup vals; - vals.TwistIndex = fullband[0].TwistIndex; - vals.GroupID = 0; - vals.myName = make_bandgroup_name(mybuilder->getName(), spin, - mybuilder->twist_num_, mybuilder->TileMatrix, input_info.min_index(), - input_info.max_index()); - vals.selectBands(fullband, spo2band[spin][input_info.min_index()], - input_info.max_index() - input_info.min_index(), false); - - return create_spline_set(spo_object_name, spin, vals); + std::string spo_object_name; + OhmmsAttributeSet a; + a.add(spo_object_name, "name"); + a.add(spo_object_name, "id"); + a.put(cur); + + if (spo2band.empty()) + spo2band.resize(mybuilder->states.size()); + + std::vector& fullband = (*(mybuilder->FullBands[spin])); + + if (spo2band[spin].empty()) + { + spo2band[spin].reserve(fullband.size()); + if (!mybuilder->states[spin]) + mybuilder->states[spin] = std::make_unique(); + mybuilder->clear_states(spin); + initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]); + } + + BandInfoGroup vals; + vals.TwistIndex = fullband[0].TwistIndex; + vals.GroupID = 0; + vals.myName = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix, + input_info.min_index(), input_info.max_index()); + vals.selectBands(fullband, spo2band[spin][input_info.min_index()], input_info.max_index() - input_info.min_index(), + false); + + return create_spline_set(spo_object_name, spin, vals); } /** build index tables to map a state to band with k-point folidng @@ -195,60 +184,60 @@ BsplineReaderBaseT::create_spline_set( * * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i */ -template -void -BsplineReaderBaseT::initialize_spo2band(int spin, - const std::vector& bigspace, SPOSetInfo& sposet, - std::vector& spo2band) +template +void BsplineReaderBaseT::initialize_spo2band(int spin, + const std::vector& bigspace, + SPOSetInfo& sposet, + std::vector& spo2band) { - spo2band.reserve(bigspace.size()); - int ns = 0; - for (int i = 0; i < bigspace.size(); ++i) { - spo2band.push_back(i); - SPOInfo a(ns, bigspace[i].Energy); - sposet.add(a); - ns++; - if (bigspace[i].MakeTwoCopies) { - spo2band.push_back(i); - SPOInfo b(ns, bigspace[i].Energy); - sposet.add(b); - ns++; - } - } - - // write to a file - const Communicate* comm = myComm; - if (comm->rank()) - return; - - std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(), - spin, mybuilder->twist_num_, mybuilder->TileMatrix, comm->getGroupID()); - aname += ".bandinfo.dat"; - - std::ofstream o(aname.c_str()); - std::array s; - ns = 0; - using PosType = QMCTraits::PosType; - o << "# Band State TwistIndex BandIndex Energy Kx Ky " - "Kz K1 K2 K3 KmK " - << std::endl; - for (int i = 0; i < bigspace.size(); ++i) { - int ti = bigspace[i].TwistIndex; - int bi = bigspace[i].BandIndex; - double e = bigspace[i].Energy; - int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; - PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); - int s_size = std::snprintf(s.data(), s.size(), - "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n", - i, ns, ti, bi, e, k[0], k[1], k[2], - mybuilder->primcell_kpoints[ti][0], - mybuilder->primcell_kpoints[ti][1], - mybuilder->primcell_kpoints[ti][2], nd); - if (s_size < 0) - throw std::runtime_error("Error generating bandinfo"); - o << s.data(); - ns += nd; + spo2band.reserve(bigspace.size()); + int ns = 0; + for (int i = 0; i < bigspace.size(); ++i) + { + spo2band.push_back(i); + SPOInfo a(ns, bigspace[i].Energy); + sposet.add(a); + ns++; + if (bigspace[i].MakeTwoCopies) + { + spo2band.push_back(i); + SPOInfo b(ns, bigspace[i].Energy); + sposet.add(b); + ns++; } + } + + // write to a file + const Communicate* comm = myComm; + if (comm->rank()) + return; + + std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(), spin, mybuilder->twist_num_, + mybuilder->TileMatrix, comm->getGroupID()); + aname += ".bandinfo.dat"; + + std::ofstream o(aname.c_str()); + std::array s; + ns = 0; + using PosType = QMCTraits::PosType; + o << "# Band State TwistIndex BandIndex Energy Kx Ky " + "Kz K1 K2 K3 KmK " + << std::endl; + for (int i = 0; i < bigspace.size(); ++i) + { + int ti = bigspace[i].TwistIndex; + int bi = bigspace[i].BandIndex; + double e = bigspace[i].Energy; + int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; + PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); + int s_size = std::snprintf(s.data(), s.size(), "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n", + i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->primcell_kpoints[ti][0], + mybuilder->primcell_kpoints[ti][1], mybuilder->primcell_kpoints[ti][2], nd); + if (s_size < 0) + throw std::runtime_error("Error generating bandinfo"); + o << s.data(); + ns += nd; + } } template class BsplineReaderBaseT; diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h index 5eab41dea55..156e6237cbb 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h @@ -28,7 +28,7 @@ namespace qmcplusplus { struct SPOSetInputInfo; -template +template class EinsplineSetBuilderT; /** @@ -39,189 +39,167 @@ class EinsplineSetBuilderT; * - set_grid : create the basic grid and boundary conditions for einspline * Note that template is abused but it works. */ -template +template class BsplineReaderBaseT { public: - /// pointer to the EinsplineSetBuilder - EinsplineSetBuilderT* mybuilder; - /// communicator - Communicate* myComm; - /// mesh size - TinyVector MeshSize; - /// check the norm of orbitals - bool checkNorm; - /// save spline coefficients to storage - bool saveSplineCoefs; - /// apply orbital rotations - bool rotate; - /// map from spo index to band index - std::vector> spo2band; - - BsplineReaderBaseT(EinsplineSetBuilderT* e); - - virtual ~BsplineReaderBaseT(); - - /** read gvectors and set the mesh, and prepare for einspline + /// pointer to the EinsplineSetBuilder + EinsplineSetBuilderT* mybuilder; + /// communicator + Communicate* myComm; + /// mesh size + TinyVector MeshSize; + /// check the norm of orbitals + bool checkNorm; + /// save spline coefficients to storage + bool saveSplineCoefs; + /// apply orbital rotations + bool rotate; + /// map from spo index to band index + std::vector> spo2band; + + BsplineReaderBaseT(EinsplineSetBuilderT* e); + + virtual ~BsplineReaderBaseT(); + + /** read gvectors and set the mesh, and prepare for einspline */ - template - inline bool - set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) - { - // This sets MeshSize from the input file - bool havePsig = mybuilder->ReadGvectors_ESHDF(); - - // If this MeshSize is not initialized, use the meshsize set by the - // input based on FFT grid and meshfactor - if (MeshSize[0] == 0) - MeshSize = mybuilder->MeshSize; - - app_log() << " Using meshsize=" << MeshSize - << "\n vs input meshsize=" << mybuilder->MeshSize - << std::endl; - - for (int j = 0; j < 3; ++j) { - xyz_grid[j].start = 0.0; - xyz_grid[j].end = 1.0; - xyz_grid[j].num = MeshSize[j]; - - if (halfg[j]) { - xyz_bc[j].lCode = ANTIPERIODIC; - xyz_bc[j].rCode = ANTIPERIODIC; - } - else { - xyz_bc[j].lCode = PERIODIC; - xyz_bc[j].rCode = PERIODIC; - } - - xyz_bc[j].lVal = 0.0; - xyz_bc[j].rVal = 0.0; - } - return havePsig; - } + template + inline bool set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) + { + // This sets MeshSize from the input file + bool havePsig = mybuilder->ReadGvectors_ESHDF(); - /** initialize twist-related data for N orbitals - */ - template - inline void - check_twists(SPE* bspline, const BandInfoGroup& bandgroup) + // If this MeshSize is not initialized, use the meshsize set by the + // input based on FFT grid and meshfactor + if (MeshSize[0] == 0) + MeshSize = mybuilder->MeshSize; + + app_log() << " Using meshsize=" << MeshSize << "\n vs input meshsize=" << mybuilder->MeshSize << std::endl; + + for (int j = 0; j < 3; ++j) { - // init(orbitalSet,bspline); - bspline->PrimLattice = mybuilder->PrimCell; - bspline->GGt = - dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); - - int N = bandgroup.getNumDistinctOrbitals(); - int numOrbs = bandgroup.getNumSPOs(); - - bspline->setOrbitalSetSize(numOrbs); - bspline->resizeStorage(N, N); - - bspline->first_spo = bandgroup.getFirstSPO(); - bspline->last_spo = bandgroup.getLastSPO(); - - int num = 0; - const std::vector& cur_bands = bandgroup.myBands; - for (int iorb = 0; iorb < N; iorb++) { - int ti = cur_bands[iorb].TwistIndex; - bspline->kPoints[iorb] = - mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); - bspline->MakeTwoCopies[iorb] = - (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; - num += bspline->MakeTwoCopies[iorb] ? 2 : 1; - } - - app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs - << std::endl; - - bspline->HalfG = 0; - TinyVector bconds = - mybuilder->TargetPtcl.getLattice().BoxBConds; - if (!bspline->isComplex()) { - // no k-point folding, single special k point (G, L ...) - TinyVector twist0 = - mybuilder->primcell_kpoints[bandgroup.TwistIndex]; - for (int i = 0; i < 3; i++) - if (bconds[i] && - ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) - bspline->HalfG[i] = 1; - else - bspline->HalfG[i] = 0; - app_log() << " TwistIndex = " << cur_bands[0].TwistIndex - << " TwistAngle " << twist0 << std::endl; - app_log() << " HalfG = " << bspline->HalfG << std::endl; - } - app_log().flush(); + xyz_grid[j].start = 0.0; + xyz_grid[j].end = 1.0; + xyz_grid[j].num = MeshSize[j]; + + if (halfg[j]) + { + xyz_bc[j].lCode = ANTIPERIODIC; + xyz_bc[j].rCode = ANTIPERIODIC; + } + else + { + xyz_bc[j].lCode = PERIODIC; + xyz_bc[j].rCode = PERIODIC; + } + + xyz_bc[j].lVal = 0.0; + xyz_bc[j].rVal = 0.0; } + return havePsig; + } - /** return the path name in hdf5 + /** initialize twist-related data for N orbitals */ - inline std::string - psi_g_path(int ti, int spin, int ib) + template + inline void check_twists(SPE* bspline, const BandInfoGroup& bandgroup) + { + // init(orbitalSet,bspline); + bspline->PrimLattice = mybuilder->PrimCell; + bspline->GGt = dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); + + int N = bandgroup.getNumDistinctOrbitals(); + int numOrbs = bandgroup.getNumSPOs(); + + bspline->setOrbitalSetSize(numOrbs); + bspline->resizeStorage(N, N); + + bspline->first_spo = bandgroup.getFirstSPO(); + bspline->last_spo = bandgroup.getLastSPO(); + + int num = 0; + const std::vector& cur_bands = bandgroup.myBands; + for (int iorb = 0; iorb < N; iorb++) { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" - << ib << "/psi_g"; - return path.str(); + int ti = cur_bands[iorb].TwistIndex; + bspline->kPoints[iorb] = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); + bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; + num += bspline->MakeTwoCopies[iorb] ? 2 : 1; } - /** return the path name in hdf5 - */ - inline std::string - psi_r_path(int ti, int spin, int ib) + app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs << std::endl; + + bspline->HalfG = 0; + TinyVector bconds = mybuilder->TargetPtcl.getLattice().BoxBConds; + if (!bspline->isComplex()) { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" - << ib << "/psi_r"; - return path.str(); + // no k-point folding, single special k point (G, L ...) + TinyVector twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex]; + for (int i = 0; i < 3; i++) + if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) + bspline->HalfG[i] = 1; + else + bspline->HalfG[i] = 0; + app_log() << " TwistIndex = " << cur_bands[0].TwistIndex << " TwistAngle " << twist0 << std::endl; + app_log() << " HalfG = " << bspline->HalfG << std::endl; } + app_log().flush(); + } - /** read/bcast psi_g + /** return the path name in hdf5 + */ + inline std::string psi_g_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_g"; + return path.str(); + } + + /** return the path name in hdf5 + */ + inline std::string psi_r_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_r"; + return path.str(); + } + + /** read/bcast psi_g * @param ti twist index * @param spin spin index * @param ib band index * @param cG psi_g as stored in hdf5 */ - void - get_psi_g(int ti, int spin, int ib, Vector>& cG); + void get_psi_g(int ti, int spin, int ib, Vector>& cG); - /** create the actual spline sets + /** create the actual spline sets */ - virtual std::unique_ptr> - create_spline_set(const std::string& my_name, int spin, - const BandInfoGroup& bandgroup) = 0; + virtual std::unique_ptr> create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) = 0; - /** setting common parameters + /** setting common parameters */ - void - setCommon(xmlNodePtr cur); + void setCommon(xmlNodePtr cur); - /** create the spline after one of the kind is created */ - std::unique_ptr> - create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); + /** create the spline after one of the kind is created */ + std::unique_ptr> create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); - /** create the spline set */ - std::unique_ptr> - create_spline_set(int spin, xmlNodePtr cur); + /** create the spline set */ + std::unique_ptr> create_spline_set(int spin, xmlNodePtr cur); - /** Set the checkNorm variable */ - inline void - setCheckNorm(bool new_checknorm) - { - checkNorm = new_checknorm; - }; + /** Set the checkNorm variable */ + inline void setCheckNorm(bool new_checknorm) { checkNorm = new_checknorm; }; - /** Set the orbital rotation flag. Rotations are applied to balance the + /** Set the orbital rotation flag. Rotations are applied to balance the * real/imaginary components. */ - inline void - setRotate(bool new_rotate) - { - rotate = new_rotate; - }; + inline void setRotate(bool new_rotate) { rotate = new_rotate; }; - void - initialize_spo2band(int spin, const std::vector& bigspace, - SPOSetInfo& sposet, std::vector& band2spo); + void initialize_spo2band(int spin, + const std::vector& bigspace, + SPOSetInfo& sposet, + std::vector& band2spo); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h index 9286624c92d..41e6f9177f7 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h @@ -35,225 +35,224 @@ namespace qmcplusplus * precision. BsplineSet also implements a few fallback routines in case * optimized implementation is not necessary in the derived class. */ -template +template class BsplineSetT : public SPOSetT { public: - using PosType = typename SPOSetT::PosType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - using GGGVector = typename SPOSetT::GGGVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGMatrix = typename SPOSetT::GGGMatrix; + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using GGGVector = typename SPOSetT::GGGVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; - using value_type = typename SPOSetT::ValueMatrix::value_type; - using grad_type = typename SPOSetT::GradMatrix::value_type; + using value_type = typename SPOSetT::ValueMatrix::value_type; + using grad_type = typename SPOSetT::GradMatrix::value_type; - // used in derived classes - using RealType = typename SPOSetT::RealType; - using ValueType = typename SPOSetT::ValueType; + // used in derived classes + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; - BsplineSetT(const std::string& my_name) : - SPOSetT(my_name), - MyIndex(0), - first_spo(0), - last_spo(0) - { - } + BsplineSetT(const std::string& my_name) : SPOSetT(my_name), MyIndex(0), first_spo(0), last_spo(0) {} - virtual bool - isComplex() const = 0; - virtual std::string - getKeyword() const = 0; + virtual bool isComplex() const = 0; + virtual std::string getKeyword() const = 0; - auto& - getHalfG() const - { - return HalfG; - } + auto& getHalfG() const { return HalfG; } - inline void - init_base(int n) + inline void init_base(int n) + { + kPoints.resize(n); + MakeTwoCopies.resize(n); + BandIndexMap.resize(n); + for (int i = 0; i < n; i++) + BandIndexMap[i] = i; + } + + /// remap kpoints to group general kpoints & special kpoints + int remap_kpoints() + { + std::vector k_copy(kPoints); + const int nk = kPoints.size(); + int nCB = 0; + // two pass + for (int i = 0; i < nk; ++i) { - kPoints.resize(n); - MakeTwoCopies.resize(n); - BandIndexMap.resize(n); - for (int i = 0; i < n; i++) - BandIndexMap[i] = i; + if (MakeTwoCopies[i]) + { + kPoints[nCB] = k_copy[i]; + BandIndexMap[nCB++] = i; + } } - - /// remap kpoints to group general kpoints & special kpoints - int - remap_kpoints() + int nRealBands = nCB; + for (int i = 0; i < nk; ++i) { - std::vector k_copy(kPoints); - const int nk = kPoints.size(); - int nCB = 0; - // two pass - for (int i = 0; i < nk; ++i) { - if (MakeTwoCopies[i]) { - kPoints[nCB] = k_copy[i]; - BandIndexMap[nCB++] = i; - } - } - int nRealBands = nCB; - for (int i = 0; i < nk; ++i) { - if (!MakeTwoCopies[i]) { - kPoints[nRealBands] = k_copy[i]; - BandIndexMap[nRealBands++] = i; - } - } - return nCB; // return the number of complex bands + if (!MakeTwoCopies[i]) + { + kPoints[nRealBands] = k_copy[i]; + BandIndexMap[nRealBands++] = i; + } } + return nCB; // return the number of complex bands + } - std::unique_ptr> - makeClone() const override = 0; + std::unique_ptr> makeClone() const override = 0; - void - setOrbitalSetSize(int norbs) override - { - this->OrbitalSetSize = norbs; - } + void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; } - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - this->evaluateVGL(P, iat, v, g, l); - } + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + this->evaluateVGL(P, iat, v, g, l); } + } - void - mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override - { - assert(this == &spo_list.getLeader()); - const size_t nw = spo_list.size(); - std::vector mw_psi_v; - std::vector mw_dpsi_v; - std::vector mw_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - mw_psi_v.reserve(nw); - mw_dpsi_v.reserve(nw); - mw_d2psi_v.reserve(nw); - psi_v_list.reserve(nw); - dpsi_v_list.reserve(nw); - d2psi_v_list.reserve(nw); + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override + { + assert(this == &spo_list.getLeader()); + const size_t nw = spo_list.size(); + std::vector mw_psi_v; + std::vector mw_dpsi_v; + std::vector mw_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + mw_psi_v.reserve(nw); + mw_dpsi_v.reserve(nw); + mw_d2psi_v.reserve(nw); + psi_v_list.reserve(nw); + dpsi_v_list.reserve(nw); + d2psi_v_list.reserve(nw); - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - mw_psi_v.clear(); - mw_dpsi_v.clear(); - mw_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + mw_psi_v.clear(); + mw_dpsi_v.clear(); + mw_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); - for (int iw = 0; iw < nw; iw++) { - mw_psi_v.emplace_back( - logdet_list[iw].get()[i], logdet_list[iw].get().cols()); - mw_dpsi_v.emplace_back( - dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); - mw_d2psi_v.emplace_back( - d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); - psi_v_list.push_back(mw_psi_v.back()); - dpsi_v_list.push_back(mw_dpsi_v.back()); - d2psi_v_list.push_back(mw_d2psi_v.back()); - } + for (int iw = 0; iw < nw; iw++) + { + mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols()); + mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); + mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); + psi_v_list.push_back(mw_psi_v.back()); + dpsi_v_list.push_back(mw_dpsi_v.back()); + d2psi_v_list.push_back(mw_d2psi_v.back()); + } - this->mw_evaluateVGL( - spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } + this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); } + } - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - this->evaluateVGH(P, iat, v, g, h); - } + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + this->evaluateVGH(P, iat, v, g, h); } + } - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - GGGVector gh( - grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); - this->evaluateVGHGH(P, iat, v, g, h, gh); - } + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); + this->evaluateVGHGH(P, iat, v, g, h, gh); } + } - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, - GradMatrix& gradphi) override - { - // Do nothing, since Einsplines don't explicitly depend on ion - // positions. - } + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override + { + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. + } - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override - { - // Do nothing, since Einsplines don't explicitly depend on ion - // positions. - } + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) override + { + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. + } - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; - template - friend class HybridRepSetReaderT; + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; + template + friend class HybridRepSetReaderT; protected: - static const int D = QMCTraits::DIM; - /// Index of this adoptor, when multiple adoptors are used for NUMA or - /// distributed cases - size_t MyIndex; - /// first index of the SPOs this Spline handles - size_t first_spo; - /// last index of the SPOs this Spline handles - size_t last_spo; - /// sign bits at the G/2 boundaries - TinyVector HalfG; - /// flags to unpack sin/cos - std::vector MakeTwoCopies; - /** kpoints for each unique orbitals. + static const int D = QMCTraits::DIM; + /// Index of this adoptor, when multiple adoptors are used for NUMA or + /// distributed cases + size_t MyIndex; + /// first index of the SPOs this Spline handles + size_t first_spo; + /// last index of the SPOs this Spline handles + size_t last_spo; + /// sign bits at the G/2 boundaries + TinyVector HalfG; + /// flags to unpack sin/cos + std::vector MakeTwoCopies; + /** kpoints for each unique orbitals. * Note: for historic reason, this sign is opposite to what was used in DFT * when orbitals were generated. Changing the sign requires updating all the * evaluation code. */ - std::vector kPoints; - /// remap splines to orbitals - aligned_vector BandIndexMap; - /// band offsets used for communication - std::vector offset; + std::vector kPoints; + /// remap splines to orbitals + aligned_vector BandIndexMap; + /// band offsets used for communication + std::vector offset; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h index 85bf667736a..af6db8671a2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h @@ -22,628 +22,565 @@ namespace qmcplusplus { -template +template class HybridRepSetReaderT; -template +template class AtomicOrbitalsT { public: - static const int D = 3; - using AtomicSplineType = typename bspline_traits::SplineType; - using AtomicBCType = typename bspline_traits::BCType; - using AtomicSingleSplineType = UBspline_1d_d; - using PointType = TinyVector; - using value_type = T; + static const int D = 3; + using AtomicSplineType = typename bspline_traits::SplineType; + using AtomicBCType = typename bspline_traits::BCType; + using AtomicSingleSplineType = UBspline_1d_d; + using PointType = TinyVector; + using value_type = T; - using vContainer_type = aligned_vector; + using vContainer_type = aligned_vector; private: - // near core cutoff - T rmin; - // far from core cutoff, rmin_sqrt>=rmin - T rmin_sqrt; - T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; - int spline_npoints, BaseN; - int NumBands, Npad; - PointType center_pos; - const int lmax, lm_tot; - SoaSphericalTensor Ylm; - vContainer_type l_vals; - vContainer_type r_power_minus_l; - /// 1D spline of radial functions of all the orbitals - std::shared_ptr> SplineInst; - - vContainer_type localV, localG, localL; + // near core cutoff + T rmin; + // far from core cutoff, rmin_sqrt>=rmin + T rmin_sqrt; + T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; + int spline_npoints, BaseN; + int NumBands, Npad; + PointType center_pos; + const int lmax, lm_tot; + SoaSphericalTensor Ylm; + vContainer_type l_vals; + vContainer_type r_power_minus_l; + /// 1D spline of radial functions of all the orbitals + std::shared_ptr> SplineInst; + + vContainer_type localV, localG, localL; public: - AtomicOrbitalsT(int Lmax) : - lmax(Lmax), - lm_tot((Lmax + 1) * (Lmax + 1)), - Ylm(Lmax) + AtomicOrbitalsT(int Lmax) : lmax(Lmax), lm_tot((Lmax + 1) * (Lmax + 1)), Ylm(Lmax) + { + r_power_minus_l.resize(lm_tot); + l_vals.resize(lm_tot); + for (int l = 0; l <= lmax; l++) + for (int m = -l; m <= l; m++) + l_vals[l * (l + 1) + m] = l; + rmin = std::exp(std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); + rmin = std::max(rmin, std::numeric_limits::epsilon()); + rmin_sqrt = std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); + } + + // accessing functions, const only + T getCutoff() const { return cutoff; } + T getCutoffBuffer() const { return cutoff_buffer; } + T getSplineRadius() const { return spline_radius; } + T getNonOverlappingRadius() const { return non_overlapping_radius; } + int getSplineNpoints() const { return spline_npoints; } + int getLmax() const { return lmax; } + const PointType& getCenterPos() const { return center_pos; } + + inline void resizeStorage(size_t Nb) + { + NumBands = Nb; + Npad = getAlignedSize(Nb); + localV.resize(Npad * lm_tot); + localG.resize(Npad * lm_tot); + localL.resize(Npad * lm_tot); + create_spline(); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm, std::vector& offset) + { + gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); + } + + template + inline void set_info(const PT& R, + const VT& cutoff_in, + const VT& cutoff_buffer_in, + const VT& spline_radius_in, + const VT& non_overlapping_radius_in, + const int spline_npoints_in) + { + center_pos[0] = R[0]; + center_pos[1] = R[1]; + center_pos[2] = R[2]; + cutoff = cutoff_in; + cutoff_buffer = cutoff_buffer_in; + spline_radius = spline_radius_in; + spline_npoints = spline_npoints_in; + non_overlapping_radius = non_overlapping_radius_in; + BaseN = spline_npoints + 2; + } + + inline void create_spline() + { + AtomicBCType bc; + bc.lCode = FLAT; + bc.rCode = NATURAL; + Ugrid grid; + grid.start = 0.0; + grid.end = spline_radius; + grid.num = spline_npoints; + SplineInst = std::make_shared>(); + SplineInst->create(grid, bc, lm_tot * Npad); + } + + inline size_t getSplineSizeInBytes() const { return SplineInst->sizeInByte(); } + + inline void flush_zero() { SplineInst->flush_zero(); } + + inline void set_spline(AtomicSingleSplineType* spline, int lm, int ispline) + { + SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); + } + + bool read_splines(hdf_archive& h5f) + { + einspline_engine bigtable(SplineInst->getSplinePtr()); + int lmax_in = 0, spline_npoints_in = 0; + T spline_radius_in; + if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) + return false; + if (!h5f.readEntry(spline_radius_in, "spline_radius") || spline_radius_in != spline_radius) + return false; + if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || spline_npoints_in != spline_npoints) + return false; + return h5f.readEntry(bigtable, "radial_spline"); + } + + bool write_splines(hdf_archive& h5f) + { + bool success = true; + success = success && h5f.writeEntry(spline_radius, "spline_radius"); + success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); + success = success && h5f.writeEntry(lmax, "l_max"); + success = success && h5f.writeEntry(center_pos, "position"); + einspline_engine bigtable(SplineInst->getSplinePtr()); + success = success && h5f.writeEntry(bigtable, "radial_spline"); + return success; + } + + // evaluate only V + template + inline void evaluate_v(const T& r, const PointType& dr, VV& myV) + { + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); + else + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + constexpr T czero(0); + T* restrict val = myV.data(); + T* restrict local_val = localV.data(); + std::fill(myV.begin(), myV.end(), czero); + + SplineInst->evaluate(r, localV); + + for (size_t lm = 0; lm < lm_tot; lm++) { - r_power_minus_l.resize(lm_tot); - l_vals.resize(lm_tot); - for (int l = 0; l <= lmax; l++) - for (int m = -l; m <= l; m++) - l_vals[l * (l + 1) + m] = l; - rmin = std::exp( - std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); - rmin = std::max(rmin, std::numeric_limits::epsilon()); - rmin_sqrt = - std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); - } - - // accessing functions, const only - T - getCutoff() const - { - return cutoff; - } - T - getCutoffBuffer() const - { - return cutoff_buffer; - } - T - getSplineRadius() const - { - return spline_radius; - } - T - getNonOverlappingRadius() const - { - return non_overlapping_radius; - } - int - getSplineNpoints() const - { - return spline_npoints; - } - int - getLmax() const - { - return lmax; - } - const PointType& - getCenterPos() const - { - return center_pos; - } - - inline void - resizeStorage(size_t Nb) - { - NumBands = Nb; - Npad = getAlignedSize(Nb); - localV.resize(Npad * lm_tot); - localG.resize(Npad * lm_tot); - localL.resize(Npad * lm_tot); - create_spline(); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm, std::vector& offset) - { - gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); - } - - template - inline void - set_info(const PT& R, const VT& cutoff_in, const VT& cutoff_buffer_in, - const VT& spline_radius_in, const VT& non_overlapping_radius_in, - const int spline_npoints_in) - { - center_pos[0] = R[0]; - center_pos[1] = R[1]; - center_pos[2] = R[2]; - cutoff = cutoff_in; - cutoff_buffer = cutoff_buffer_in; - spline_radius = spline_radius_in; - spline_npoints = spline_npoints_in; - non_overlapping_radius = non_overlapping_radius_in; - BaseN = spline_npoints + 2; - } - - inline void - create_spline() - { - AtomicBCType bc; - bc.lCode = FLAT; - bc.rCode = NATURAL; - Ugrid grid; - grid.start = 0.0; - grid.end = spline_radius; - grid.num = spline_npoints; - SplineInst = std::make_shared>(); - SplineInst->create(grid, bc, lm_tot * Npad); - } - - inline size_t - getSplineSizeInBytes() const - { - return SplineInst->sizeInByte(); - } - - inline void - flush_zero() - { - SplineInst->flush_zero(); - } - - inline void - set_spline(AtomicSingleSplineType* spline, int lm, int ispline) - { - SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); - } - - bool - read_splines(hdf_archive& h5f) - { - einspline_engine bigtable(SplineInst->getSplinePtr()); - int lmax_in = 0, spline_npoints_in = 0; - T spline_radius_in; - if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) - return false; - if (!h5f.readEntry(spline_radius_in, "spline_radius") || - spline_radius_in != spline_radius) - return false; - if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || - spline_npoints_in != spline_npoints) - return false; - return h5f.readEntry(bigtable, "radial_spline"); - } - - bool - write_splines(hdf_archive& h5f) - { - bool success = true; - success = success && h5f.writeEntry(spline_radius, "spline_radius"); - success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); - success = success && h5f.writeEntry(lmax, "l_max"); - success = success && h5f.writeEntry(center_pos, "position"); - einspline_engine bigtable(SplineInst->getSplinePtr()); - success = success && h5f.writeEntry(bigtable, "radial_spline"); - return success; - } - - // evaluate only V - template - inline void - evaluate_v(const T& r, const PointType& dr, VV& myV) - { - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); - else - Ylm.evaluateV(0, 0, 1); - const T* restrict Ylm_v = Ylm[0]; - - constexpr T czero(0); - T* restrict val = myV.data(); - T* restrict local_val = localV.data(); - std::fill(myV.begin(), myV.end(), czero); - - SplineInst->evaluate(r, localV); - - for (size_t lm = 0; lm < lm_tot; lm++) { #pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } - } - - template - inline void - evaluateValues(const DISPL& Displacements, const int center_idx, const T& r, - VM& multi_myV) - { - if (r <= std::numeric_limits::epsilon()) - Ylm.evaluateV(0, 0, 1); - const T* restrict Ylm_v = Ylm[0]; - - const size_t m = multi_myV.cols(); - constexpr T czero(0); - std::fill(multi_myV.begin(), multi_myV.end(), czero); - SplineInst->evaluate(r, localV); - - for (int ivp = 0; ivp < Displacements.size(); ivp++) { - PointType dr = Displacements[ivp][center_idx]; - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); - - T* restrict val = multi_myV[ivp]; - T* restrict local_val = localV.data(); - for (size_t lm = 0; lm < lm_tot; lm++) { + for (size_t ib = 0; ib < myV.size(); ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + + template + inline void evaluateValues(const DISPL& Displacements, const int center_idx, const T& r, VM& multi_myV) + { + if (r <= std::numeric_limits::epsilon()) + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + const size_t m = multi_myV.cols(); + constexpr T czero(0); + std::fill(multi_myV.begin(), multi_myV.end(), czero); + SplineInst->evaluate(r, localV); + + for (int ivp = 0; ivp < Displacements.size(); ivp++) + { + PointType dr = Displacements[ivp][center_idx]; + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); + + T* restrict val = multi_myV[ivp]; + T* restrict local_val = localV.data(); + for (size_t lm = 0; lm < lm_tot; lm++) + { #pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < m; ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } + for (size_t ib = 0; ib < m; ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + } + + // evaluate VGL + template + inline void evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL) + { + T drx, dry, drz, rhatx, rhaty, rhatz, rinv; + if (r > rmin) + { + rinv = 1.0 / r; + } + else + { + rinv = 0; + } + drx = dr[0]; + dry = dr[1]; + drz = dr[2]; + rhatx = drx * rinv; + rhaty = dry * rinv; + rhatz = drz * rinv; + + Ylm.evaluateVGL(drx, dry, drz); + const T* restrict Ylm_v = Ylm[0]; + const T* restrict Ylm_gx = Ylm[1]; + const T* restrict Ylm_gy = Ylm[2]; + const T* restrict Ylm_gz = Ylm[3]; + + T* restrict g0 = myG.data(0); + T* restrict g1 = myG.data(1); + T* restrict g2 = myG.data(2); + constexpr T czero(0), cone(1), chalf(0.5); + std::fill(myV.begin(), myV.end(), czero); + std::fill(g0, g0 + Npad, czero); + std::fill(g1, g1 + Npad, czero); + std::fill(g2, g2 + Npad, czero); + std::fill(myL.begin(), myL.end(), czero); + T* restrict val = myV.data(); + T* restrict lapl = myL.data(); + T* restrict local_val = localV.data(); + T* restrict local_grad = localG.data(); + T* restrict local_lapl = localL.data(); + + SplineInst->evaluate_vgl(r, localV, localG, localL); + + if (r > rmin_sqrt) + { + // far from core + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) + { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) + { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power; +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = l_val * rinv * local_v; + val[ib] += Ylm_rescale * local_v; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -Vpart * Ylm_rescale; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; + + // laplacian + lapl[ib] += (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * Ylm_rescale + (local_g - Vpart) * rhat_dot_G; } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } + } + else if (r > rmin) + { + // the possibility of reaching here is very very low + std::cout << "Warning: an electron is very close to an ion, distance=" << r << " be careful!" << std::endl; + // near core, kill divergence in the laplacian + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) + { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) + { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r; +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = Ylm_rescale * local_v; + val[ib] += Vpart; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -l_val * Vpart * rinv; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; + + // laplacian + lapl[ib] += local_l * (cone - chalf * l_val) * (3 * Ylm_rescale + rhat_dot_G); + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } } - - // evaluate VGL - template - inline void - evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL) + else { - T drx, dry, drz, rhatx, rhaty, rhatz, rinv; - if (r > rmin) { - rinv = 1.0 / r; - } - else { - rinv = 0; - } - drx = dr[0]; - dry = dr[1]; - drz = dr[2]; - rhatx = drx * rinv; - rhaty = dry * rinv; - rhatz = drz * rinv; - - Ylm.evaluateVGL(drx, dry, drz); - const T* restrict Ylm_v = Ylm[0]; - const T* restrict Ylm_gx = Ylm[1]; - const T* restrict Ylm_gy = Ylm[2]; - const T* restrict Ylm_gz = Ylm[3]; - - T* restrict g0 = myG.data(0); - T* restrict g1 = myG.data(1); - T* restrict g2 = myG.data(2); - constexpr T czero(0), cone(1), chalf(0.5); - std::fill(myV.begin(), myV.end(), czero); - std::fill(g0, g0 + Npad, czero); - std::fill(g1, g1 + Npad, czero); - std::fill(g2, g2 + Npad, czero); - std::fill(myL.begin(), myL.end(), czero); - T* restrict val = myV.data(); - T* restrict lapl = myL.data(); - T* restrict local_val = localV.data(); - T* restrict local_grad = localG.data(); - T* restrict local_lapl = localL.data(); - - SplineInst->evaluate_vgl(r, localV, localG, localL); - - if (r > rmin_sqrt) { - // far from core - r_power_minus_l[0] = cone; - T r_power_temp = cone; - for (int l = 1; l <= lmax; l++) { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) { - const T& l_val = l_vals[lm]; - const T& r_power = r_power_minus_l[lm]; - const T Ylm_rescale = Ylm_v[lm] * r_power; - const T rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + - rhatz * Ylm_gz[lm]) * - r_power; -#pragma omp simd aligned( \ - val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \ - : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) { - const T local_v = local_val[ib]; - const T local_g = local_grad[ib]; - const T local_l = local_lapl[ib]; - // value - const T Vpart = l_val * rinv * local_v; - val[ib] += Ylm_rescale * local_v; - - // grad - const T factor1 = local_g * Ylm_rescale; - const T factor2 = local_v * r_power; - const T factor3 = -Vpart * Ylm_rescale; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + - factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + - factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + - factor3 * rhatz; - - // laplacian - lapl[ib] += - (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * - Ylm_rescale + - (local_g - Vpart) * rhat_dot_G; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else if (r > rmin) { - // the possibility of reaching here is very very low - std::cout - << "Warning: an electron is very close to an ion, distance=" - << r << " be careful!" << std::endl; - // near core, kill divergence in the laplacian - r_power_minus_l[0] = cone; - T r_power_temp = cone; - for (int l = 1; l <= lmax; l++) { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) { - const T& l_val = l_vals[lm]; - const T& r_power = r_power_minus_l[lm]; - const T Ylm_rescale = Ylm_v[lm] * r_power; - const T rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + - Ylm_gz[lm] * rhatz) * - r_power * r; -#pragma omp simd aligned( \ - val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \ - : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) { - const T local_v = local_val[ib]; - const T local_g = local_grad[ib]; - const T local_l = local_lapl[ib]; - // value - const T Vpart = Ylm_rescale * local_v; - val[ib] += Vpart; - - // grad - const T factor1 = local_g * Ylm_rescale; - const T factor2 = local_v * r_power; - const T factor3 = -l_val * Vpart * rinv; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + - factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + - factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + - factor3 * rhatz; - - // laplacian - lapl[ib] += local_l * (cone - chalf * l_val) * - (3 * Ylm_rescale + rhat_dot_G); - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else { - std::cout << "Warning: an electron is on top of an ion!" - << std::endl; - // strictly zero + std::cout << "Warning: an electron is on top of an ion!" << std::endl; + // strictly zero #pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) { - // value - val[ib] = Ylm_v[0] * local_val[ib]; - - // laplacian - lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - if (lm_tot > 0) { - // std::cout << std::endl; - for (size_t lm = 1; lm < 4; lm++) { + for (size_t ib = 0; ib < myV.size(); ib++) + { + // value + val[ib] = Ylm_v[0] * local_val[ib]; + + // laplacian + lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + if (lm_tot > 0) + { + // std::cout << std::endl; + for (size_t lm = 1; lm < 4; lm++) + { #pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) { - const T local_g = local_grad[ib]; - // grad - g0[ib] += local_g * Ylm_gx[lm]; - g1[ib] += local_g * Ylm_gy[lm]; - g2[ib] += local_g * Ylm_gz[lm]; - } - local_grad += Npad; - } - } + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_g = local_grad[ib]; + // grad + g0[ib] += local_g * Ylm_gx[lm]; + g1[ib] += local_g * Ylm_gy[lm]; + g2[ib] += local_g * Ylm_gz[lm]; + } + local_grad += Npad; } + } } + } - template - void - evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH) - { - // Needed to do tensor product here - APP_ABORT("AtomicOrbitals::evaluate_vgh"); - } + template + void evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH) + { + // Needed to do tensor product here + APP_ABORT("AtomicOrbitals::evaluate_vgh"); + } }; -template +template class HybridRepCenterOrbitalsT { public: - static const int D = 3; - using PointType = typename AtomicOrbitalsT::PointType; - using RealType = typename DistanceTableT::RealType; - using PosType = typename DistanceTableT::PosType; + static const int D = 3; + using PointType = typename AtomicOrbitalsT::PointType; + using RealType = typename DistanceTableT::RealType; + using PosType = typename DistanceTableT::PosType; private: - /// atomic centers - std::vector> AtomicCenters; - /// table index - int myTableID; - /// mapping supercell to primitive cell - std::vector Super2Prim; - /// r from distance table - RealType dist_r; - /// dr from distance table - PosType dist_dr; - /// for APBC - PointType r_image; - /// smooth function value - RealType f; - /// smooth function first derivative - RealType df_dr; - /// smooth function second derivative - RealType d2f_dr2; - /// smoothing schemes - enum class smoothing_schemes - { - CONSISTENT = 0, - SMOOTHALL, - SMOOTHPARTIAL - } smooth_scheme; - /// smoothing function - smoothing_functions smooth_func_id; + /// atomic centers + std::vector> AtomicCenters; + /// table index + int myTableID; + /// mapping supercell to primitive cell + std::vector Super2Prim; + /// r from distance table + RealType dist_r; + /// dr from distance table + PosType dist_dr; + /// for APBC + PointType r_image; + /// smooth function value + RealType f; + /// smooth function first derivative + RealType df_dr; + /// smooth function second derivative + RealType d2f_dr2; + /// smoothing schemes + enum class smoothing_schemes + { + CONSISTENT = 0, + SMOOTHALL, + SMOOTHPARTIAL + } smooth_scheme; + /// smoothing function + smoothing_functions smooth_func_id; public: - HybridRepCenterOrbitalsT() - { - } - - void - set_info(const ParticleSetT& ions, ParticleSetT& els, - const std::vector& mapping) - { - myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); - Super2Prim = mapping; - } - - inline void - resizeStorage(size_t Nb) - { - size_t SplineCoefsBytes = 0; - - for (int ic = 0; ic < AtomicCenters.size(); ic++) { - AtomicCenters[ic].resizeStorage(Nb); - SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); - } - - app_log() - << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " - << "for the atomic radial splines in hybrid orbital representation" - << std::endl; - } - - void - bcast_tables(Communicate* comm) - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].bcast_tables(comm); - } - - void - gather_atomic_tables(Communicate* comm, std::vector& offset) - { - if (comm->size() == 1) - return; - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].gather_tables(comm, offset); - } - - inline void - flush_zero() - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].flush_zero(); - } - - bool - read_splines(hdf_archive& h5f) - { - bool success = true; - size_t ncenter; - - try { - h5f.push("atomic_centers", false); - } - catch (...) { - success = false; - } - success = success && h5f.readEntry(ncenter, "number_of_centers"); - if (!success) - return success; - if (ncenter != AtomicCenters.size()) - success = false; - // read splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) { - std::ostringstream gname; - gname << "center_" << ic; - try { - h5f.push(gname.str().c_str(), false); - } - catch (...) { - success = false; - } - success = success && AtomicCenters[ic].read_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - bool - write_splines(hdf_archive& h5f) - { - bool success = true; - int ncenter = AtomicCenters.size(); - try { - h5f.push("atomic_centers", true); - } - catch (...) { - success = false; - } - success = success && h5f.writeEntry(ncenter, "number_of_centers"); - // write splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) { - std::ostringstream gname; - gname << "center_" << ic; - try { - h5f.push(gname.str().c_str(), true); - } - catch (...) { - success = false; - } - success = success && AtomicCenters[ic].write_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - template - inline int - get_bc_sign( - const PointType& r, const Cell& PrimLattice, TinyVector& HalfG) - { - int bc_sign = 0; - PointType shift_unit = PrimLattice.toUnit(r - r_image); - for (int i = 0; i < D; i++) { - ST img = round(shift_unit[i]); - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - // evaluate only V - template - inline RealType - evaluate_v(const ParticleSetT& P, const int iat, VV& myV) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor( - iat, dist_r, dist_dr, P.getActivePtcl() == iat); - if (center_idx < 0) - abort(); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - if (dist_r < myCenter.getCutoff()) { - PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); - r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_v(dist_r, dr, myV); - return smooth_function( - myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); - } - return RealType(-1); - } - - /* check if the batched algorithm is safe to operate + HybridRepCenterOrbitalsT() {} + + void set_info(const ParticleSetT& ions, ParticleSetT& els, const std::vector& mapping) + { + myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); + Super2Prim = mapping; + } + + inline void resizeStorage(size_t Nb) + { + size_t SplineCoefsBytes = 0; + + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + AtomicCenters[ic].resizeStorage(Nb); + SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); + } + + app_log() << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " + << "for the atomic radial splines in hybrid orbital representation" << std::endl; + } + + void bcast_tables(Communicate* comm) + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].bcast_tables(comm); + } + + void gather_atomic_tables(Communicate* comm, std::vector& offset) + { + if (comm->size() == 1) + return; + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].gather_tables(comm, offset); + } + + inline void flush_zero() + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].flush_zero(); + } + + bool read_splines(hdf_archive& h5f) + { + bool success = true; + size_t ncenter; + + try + { + h5f.push("atomic_centers", false); + } + catch (...) + { + success = false; + } + success = success && h5f.readEntry(ncenter, "number_of_centers"); + if (!success) + return success; + if (ncenter != AtomicCenters.size()) + success = false; + // read splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + std::ostringstream gname; + gname << "center_" << ic; + try + { + h5f.push(gname.str().c_str(), false); + } + catch (...) + { + success = false; + } + success = success && AtomicCenters[ic].read_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + bool write_splines(hdf_archive& h5f) + { + bool success = true; + int ncenter = AtomicCenters.size(); + try + { + h5f.push("atomic_centers", true); + } + catch (...) + { + success = false; + } + success = success && h5f.writeEntry(ncenter, "number_of_centers"); + // write splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + std::ostringstream gname; + gname << "center_" << ic; + try + { + h5f.push(gname.str().c_str(), true); + } + catch (...) + { + success = false; + } + success = success && AtomicCenters[ic].write_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + template + inline int get_bc_sign(const PointType& r, const Cell& PrimLattice, TinyVector& HalfG) + { + int bc_sign = 0; + PointType shift_unit = PrimLattice.toUnit(r - r_image); + for (int i = 0; i < D; i++) + { + ST img = round(shift_unit[i]); + bc_sign += HalfG[i] * (int)img; + } + return bc_sign; + } + + // evaluate only V + template + inline RealType evaluate_v(const ParticleSetT& P, const int iat, VV& myV) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_v(dist_r, dr, myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + /* check if the batched algorithm is safe to operate * @param VP virtual particle set * @return true if it is safe * @@ -654,165 +591,154 @@ class HybridRepCenterOrbitalsT * reference center and introduce some error. In this case, the non-batched * algorithm should be used. */ - bool - is_batched_safe(const VirtualParticleSetT& VP) - { - const int center_idx = VP.refSourcePtcl; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - return VP.getRefPS().getDistTableAB(myTableID).getDistRow( - VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius(); - } - - // C2C, C2R cases - template - inline RealType - evaluateValuesC2X(const VirtualParticleSetT& VP, VM& multi_myV) - { - const int center_idx = VP.refSourcePtcl; - dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow( - VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - if (dist_r < myCenter.getCutoff()) { - myCenter.evaluateValues( - VP.getDistTableAB(myTableID).getDisplacements(), center_idx, - dist_r, multi_myV); - return smooth_function( - myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); - } - return RealType(-1); - } - - // R2R case - template - inline RealType - evaluateValuesR2R(const VirtualParticleSetT& VP, - const Cell& PrimLattice, TinyVector& HalfG, VM& multi_myV, - SV& bc_signs) - { - const int center_idx = VP.refSourcePtcl; - dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow( - VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - if (dist_r < myCenter.getCutoff()) { - const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); - for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) { - r_image = myCenter.getCenterPos() - displ[ivp][center_idx]; - bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG); - ; - } - myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV); - return smooth_function( - myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); - } - return RealType(-1); - } - - // evaluate only VGL - template - inline RealType - evaluate_vgl( - const ParticleSetT& P, const int iat, VV& myV, GV& myG, VV& myL) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor( - iat, dist_r, dist_dr, P.getActivePtcl() == iat); - if (center_idx < 0) - abort(); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - if (dist_r < myCenter.getCutoff()) { - PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); - r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL); - return smooth_function( - myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); - } - return RealType(-1); - } - - // evaluate only VGH - template - inline RealType - evaluate_vgh( - const ParticleSetT& P, const int iat, VV& myV, GV& myG, HT& myH) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor( - iat, dist_r, dist_dr, P.getActivePtcl() == iat); - if (center_idx < 0) - abort(); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - if (dist_r < myCenter.getCutoff()) { - PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); - r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH); - return smooth_function( - myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); - } - return RealType(-1); - } - - // interpolate buffer region, value only - template - inline void - interpolate_buffer_v(VV& psi, const VV& psi_AO) const - { - const RealType cone(1); - for (size_t i = 0; i < psi.size(); i++) - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - - // interpolate buffer region, value, gradients and laplacian - template - inline void - interpolate_buffer_vgl(VV& psi, GV& dpsi, VV& d2psi, const VV& psi_AO, - const GV& dpsi_AO, const VV& d2psi_AO) const - { - const RealType cone(1), ctwo(2); - const RealType rinv(1.0 / dist_r); - if (smooth_scheme == smoothing_schemes::CONSISTENT) - for (size_t i = 0; i < psi.size(); - i++) { // psi, dpsi, d2psi are all consistent - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + - df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + - (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + - df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHALL) - for (size_t i = 0; i < psi.size(); i++) { - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) - for (size_t i = 0; i < psi.size(); - i++) { // dpsi, d2psi are consistent but psi is not. - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + - df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else - throw std::runtime_error("Unknown smooth scheme!"); - } - - inline RealType - smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r) - { - const RealType cone(1); - if (r < cutoff_buffer) - return cone; - const RealType scale = cone / (cutoff - cutoff_buffer); - const RealType x = (r - cutoff_buffer) * scale; - f = smoothing(smooth_func_id, x, df_dr, d2f_dr2); - df_dr *= scale; - d2f_dr2 *= scale * scale; - return f; - } - - template - friend class HybridRepSetReaderT; + bool is_batched_safe(const VirtualParticleSetT& VP) + { + const int center_idx = VP.refSourcePtcl; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + return VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < + myCenter.getNonOverlappingRadius(); + } + + // C2C, C2R cases + template + inline RealType evaluateValuesC2X(const VirtualParticleSetT& VP, VM& multi_myV) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, dist_r, multi_myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // R2R case + template + inline RealType evaluateValuesR2R(const VirtualParticleSetT& VP, + const Cell& PrimLattice, + TinyVector& HalfG, + VM& multi_myV, + SV& bc_signs) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); + for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) + { + r_image = myCenter.getCenterPos() - displ[ivp][center_idx]; + bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG); + ; + } + myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGL + template + inline RealType evaluate_vgl(const ParticleSetT& P, const int iat, VV& myV, GV& myG, VV& myL) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGH + template + inline RealType evaluate_vgh(const ParticleSetT& P, const int iat, VV& myV, GV& myG, HT& myH) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // interpolate buffer region, value only + template + inline void interpolate_buffer_v(VV& psi, const VV& psi_AO) const + { + const RealType cone(1); + for (size_t i = 0; i < psi.size(); i++) + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + + // interpolate buffer region, value, gradients and laplacian + template + inline void interpolate_buffer_vgl(VV& psi, + GV& dpsi, + VV& d2psi, + const VV& psi_AO, + const GV& dpsi_AO, + const VV& d2psi_AO) const + { + const RealType cone(1), ctwo(2); + const RealType rinv(1.0 / dist_r); + if (smooth_scheme == smoothing_schemes::CONSISTENT) + for (size_t i = 0; i < psi.size(); i++) + { // psi, dpsi, d2psi are all consistent + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + + (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHALL) + for (size_t i = 0; i < psi.size(); i++) + { + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) + for (size_t i = 0; i < psi.size(); i++) + { // dpsi, d2psi are consistent but psi is not. + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else + throw std::runtime_error("Unknown smooth scheme!"); + } + + inline RealType smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r) + { + const RealType cone(1); + if (r < cutoff_buffer) + return cone; + const RealType scale = cone / (cutoff - cutoff_buffer); + const RealType x = (r - cutoff_buffer) * scale; + f = smoothing(smooth_func_id, x, df_dr, d2f_dr2); + df_dr *= scale; + d2f_dr2 *= scale * scale; + return f; + } + + template + friend class HybridRepSetReaderT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h index 6783bc4b6bd..84bda28daa1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h @@ -129,467 +129,450 @@ struct Gvectors /** General HybridRepSetReader to handle any unitcell */ -template +template class HybridRepSetReaderT : public SplineSetReaderT { public: - using BaseReader = SplineSetReaderT; + using BaseReader = SplineSetReaderT; - using BaseReader::bspline; - using BaseReader::mybuilder; - using BaseReader::rotate_phase_i; - using BaseReader::rotate_phase_r; - using typename BaseReader::DataType; - using typename BaseReader::ValueType; + using BaseReader::bspline; + using BaseReader::mybuilder; + using BaseReader::rotate_phase_i; + using BaseReader::rotate_phase_r; + using typename BaseReader::DataType; + using typename BaseReader::ValueType; - HybridRepSetReaderT(EinsplineSetBuilderT* e) : BaseReader(e) - { - } + HybridRepSetReaderT(EinsplineSetBuilderT* e) : BaseReader(e) {} - /** initialize basic parameters of atomic orbitals */ - void - initialize_hybridrep_atomic_centers() override - { - OhmmsAttributeSet a; - std::string scheme_name("Consistent"); - std::string s_function_name("LEKS2018"); - a.add(scheme_name, "smoothing_scheme"); - a.add(s_function_name, "smoothing_function"); - a.put(mybuilder->XMLRoot); - // assign smooth_scheme - if (scheme_name == "Consistent") - this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; - else if (scheme_name == "SmoothAll") - bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL; - else if (scheme_name == "SmoothPartial") - bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; - else - APP_ABORT( - "initialize_hybridrep_atomic_centers wrong smoothing_scheme " + /** initialize basic parameters of atomic orbitals */ + void initialize_hybridrep_atomic_centers() override + { + OhmmsAttributeSet a; + std::string scheme_name("Consistent"); + std::string s_function_name("LEKS2018"); + a.add(scheme_name, "smoothing_scheme"); + a.add(s_function_name, "smoothing_function"); + a.put(mybuilder->XMLRoot); + // assign smooth_scheme + if (scheme_name == "Consistent") + this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; + else if (scheme_name == "SmoothAll") + bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL; + else if (scheme_name == "SmoothPartial") + bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; + else + APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme " "name! Only allows Consistent, SmoothAll or " "SmoothPartial."); - // assign smooth_function - if (s_function_name == "LEKS2018") - bspline->smooth_func_id = smoothing_functions::LEKS2018; - else if (s_function_name == "coscos") - bspline->smooth_func_id = smoothing_functions::COSCOS; - else if (s_function_name == "linear") - bspline->smooth_func_id = smoothing_functions::LINEAR; - else - APP_ABORT( - "initialize_hybridrep_atomic_centers wrong smoothing_function " + // assign smooth_function + if (s_function_name == "LEKS2018") + bspline->smooth_func_id = smoothing_functions::LEKS2018; + else if (s_function_name == "coscos") + bspline->smooth_func_id = smoothing_functions::COSCOS; + else if (s_function_name == "linear") + bspline->smooth_func_id = smoothing_functions::LINEAR; + else + APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_function " "name! Only allows LEKS2018, coscos or linear."); - app_log() << "Hybrid orbital representation uses " << scheme_name - << " smoothing scheme and " << s_function_name - << " smoothing function." << std::endl; - - bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, - mybuilder->Super2Prim); - auto& centers = bspline->AtomicCenters; - auto& ACInfo = mybuilder->AtomicCentersInfo; - // load atomic center info only when it is not initialized - if (centers.size() == 0) { - bool success = true; - app_log() << "Reading atomic center info for hybrid representation" - << std::endl; - for (int center_idx = 0; center_idx < ACInfo.Ncenters; - center_idx++) { - const int my_GroupID = ACInfo.GroupID[center_idx]; - if (ACInfo.cutoff[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs " - "parameter 'cutoff_radius' for atom " - << center_idx << std::endl; - success = false; - } + app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name + << " smoothing function." << std::endl; + + bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, mybuilder->Super2Prim); + auto& centers = bspline->AtomicCenters; + auto& ACInfo = mybuilder->AtomicCentersInfo; + // load atomic center info only when it is not initialized + if (centers.size() == 0) + { + bool success = true; + app_log() << "Reading atomic center info for hybrid representation" << std::endl; + for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) + { + const int my_GroupID = ACInfo.GroupID[center_idx]; + if (ACInfo.cutoff[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs " + "parameter 'cutoff_radius' for atom " + << center_idx << std::endl; + success = false; + } - if (ACInfo.inner_cutoff[center_idx] < 0) { - const double inner_cutoff = - std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); - app_log() << "Hybrid orbital representation setting " - "'inner_cutoff' to " - << inner_cutoff << " for group " << my_GroupID - << " as atom " << center_idx << std::endl; - // overwrite the inner_cutoff of all the atoms of the same - // species - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) - ACInfo.inner_cutoff[id] = inner_cutoff; - } - else if (ACInfo.inner_cutoff[center_idx] > - ACInfo.cutoff[center_idx]) { - app_error() - << "Hybrid orbital representation 'inner_cutoff' must " - "be smaller than 'spline_radius' for atom " - << center_idx << std::endl; - success = false; - } + if (ACInfo.inner_cutoff[center_idx] < 0) + { + const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); + app_log() << "Hybrid orbital representation setting " + "'inner_cutoff' to " + << inner_cutoff << " for group " << my_GroupID << " as atom " << center_idx << std::endl; + // overwrite the inner_cutoff of all the atoms of the same + // species + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) + ACInfo.inner_cutoff[id] = inner_cutoff; + } + else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx]) + { + app_error() << "Hybrid orbital representation 'inner_cutoff' must " + "be smaller than 'spline_radius' for atom " + << center_idx << std::endl; + success = false; + } - if (ACInfo.cutoff[center_idx] > 0) { - if (ACInfo.lmax[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs " - "parameter 'lmax' for atom " - << center_idx << std::endl; - success = false; - } - - if (ACInfo.spline_radius[center_idx] < 0 && - ACInfo.spline_npoints[center_idx] < 0) { - app_log() << "Parameters 'spline_radius' and " - "'spline_npoints' for group " - << my_GroupID << " as atom " << center_idx - << " are not specified." << std::endl; - const double delta = - std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); - const int n_grid_point = - std::ceil( - (ACInfo.cutoff[center_idx] + 1e-4) / delta) + - 3; - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) { - ACInfo.spline_npoints[id] = n_grid_point; - ACInfo.spline_radius[id] = - (n_grid_point - 1) * delta; - } - app_log() << " Based on default grid point distance " - << delta << std::endl; - app_log() - << " Setting 'spline_npoints' to " - << ACInfo.spline_npoints[center_idx] << std::endl; - app_log() - << " Setting 'spline_radius' to " - << ACInfo.spline_radius[center_idx] << std::endl; - } - else { - if (ACInfo.spline_radius[center_idx] < 0) { - app_error() - << "Hybrid orbital representation needs " - "parameter 'spline_radius' for atom " - << center_idx << std::endl; - success = false; - } - - if (ACInfo.spline_npoints[center_idx] < 0) { - app_error() - << "Hybrid orbital representation needs " - "parameter 'spline_npoints' for atom " - << center_idx << std::endl; - success = false; - } - } - - // check maximally allowed cutoff_radius - double max_allowed_cutoff = - ACInfo.spline_radius[center_idx] - - 2.0 * ACInfo.spline_radius[center_idx] / - (ACInfo.spline_npoints[center_idx] - 1); - if (success && - ACInfo.cutoff[center_idx] > max_allowed_cutoff) { - app_error() << "Hybrid orbital representation requires " - "cutoff_radius<=" - << max_allowed_cutoff - << " calculated by " - "spline_radius-2*spline_radius/" - "(spline_npoints-1) for atom " - << center_idx << std::endl; - success = false; - } - } - else { - // no atomic regions for this atom type - ACInfo.spline_radius[center_idx] = 0.0; - ACInfo.spline_npoints[center_idx] = 0; - ACInfo.lmax[center_idx] = 0; - } + if (ACInfo.cutoff[center_idx] > 0) + { + if (ACInfo.lmax[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs " + "parameter 'lmax' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0) + { + app_log() << "Parameters 'spline_radius' and " + "'spline_npoints' for group " + << my_GroupID << " as atom " << center_idx << " are not specified." << std::endl; + const double delta = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); + const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3; + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) + { + ACInfo.spline_npoints[id] = n_grid_point; + ACInfo.spline_radius[id] = (n_grid_point - 1) * delta; + } + app_log() << " Based on default grid point distance " << delta << std::endl; + app_log() << " Setting 'spline_npoints' to " << ACInfo.spline_npoints[center_idx] << std::endl; + app_log() << " Setting 'spline_radius' to " << ACInfo.spline_radius[center_idx] << std::endl; + } + else + { + if (ACInfo.spline_radius[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs " + "parameter 'spline_radius' for atom " + << center_idx << std::endl; + success = false; } - if (!success) - BaseReader::myComm->barrier_and_abort( - "initialize_hybridrep_atomic_centers Failed to initialize " - "atomic centers " - "in hybrid orbital representation!"); - - for (int center_idx = 0; center_idx < ACInfo.Ncenters; - center_idx++) { - AtomicOrbitalsT oneCenter(ACInfo.lmax[center_idx]); - oneCenter.set_info(ACInfo.ion_pos[center_idx], - ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], - ACInfo.spline_radius[center_idx], - ACInfo.non_overlapping_radius[center_idx], - ACInfo.spline_npoints[center_idx]); - centers.push_back(oneCenter); + + if (ACInfo.spline_npoints[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs " + "parameter 'spline_npoints' for atom " + << center_idx << std::endl; + success = false; } + } + + // check maximally allowed cutoff_radius + double max_allowed_cutoff = ACInfo.spline_radius[center_idx] - + 2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1); + if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff) + { + app_error() << "Hybrid orbital representation requires " + "cutoff_radius<=" + << max_allowed_cutoff + << " calculated by " + "spline_radius-2*spline_radius/" + "(spline_npoints-1) for atom " + << center_idx << std::endl; + success = false; + } + } + else + { + // no atomic regions for this atom type + ACInfo.spline_radius[center_idx] = 0.0; + ACInfo.spline_npoints[center_idx] = 0; + ACInfo.lmax[center_idx] = 0; } + } + if (!success) + BaseReader::myComm->barrier_and_abort("initialize_hybridrep_atomic_centers Failed to initialize " + "atomic centers " + "in hybrid orbital representation!"); + + for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) + { + AtomicOrbitalsT oneCenter(ACInfo.lmax[center_idx]); + oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], + ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx], + ACInfo.spline_npoints[center_idx]); + centers.push_back(oneCenter); + } } + } - /** initialize construct atomic orbital radial functions from plane waves */ - inline void - create_atomic_centers_Gspace(Vector>& cG, - Communicate& band_group_comm, int iorb) override + /** initialize construct atomic orbital radial functions from plane waves */ + inline void create_atomic_centers_Gspace(Vector>& cG, + Communicate& band_group_comm, + int iorb) override + { + band_group_comm.bcast(rotate_phase_r); + band_group_comm.bcast(rotate_phase_i); + band_group_comm.bcast(cG); + // distribute G-vectors over processor groups + const int Ngvecs = mybuilder->Gvecs[0].size(); + const int Nprocs = band_group_comm.size(); + const int Ngvecgroups = std::min(Ngvecs, Nprocs); + Communicate gvec_group_comm(band_group_comm, Ngvecgroups); + std::vector gvec_groups(Ngvecgroups + 1, 0); + FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); + const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; + const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; + + // prepare Gvecs Ylm(G) + using UnitCellType = typename EinsplineSetBuilderT::UnitCellType; + Gvectors Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline->HalfG, gvec_first, + gvec_last); + // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << + // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; + + std::vector>& centers = bspline->AtomicCenters; + app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; + // collect atomic centers by group + std::vector uniq_species; + for (int center_idx = 0; center_idx < centers.size(); center_idx++) { - band_group_comm.bcast(rotate_phase_r); - band_group_comm.bcast(rotate_phase_i); - band_group_comm.bcast(cG); - // distribute G-vectors over processor groups - const int Ngvecs = mybuilder->Gvecs[0].size(); - const int Nprocs = band_group_comm.size(); - const int Ngvecgroups = std::min(Ngvecs, Nprocs); - Communicate gvec_group_comm(band_group_comm, Ngvecgroups); - std::vector gvec_groups(Ngvecgroups + 1, 0); - FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); - const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; - const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; - - // prepare Gvecs Ylm(G) - using UnitCellType = - typename EinsplineSetBuilderT::UnitCellType; - Gvectors Gvecs(mybuilder->Gvecs[0], - mybuilder->PrimCell, bspline->HalfG, gvec_first, gvec_last); - // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << - // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; - - std::vector>& centers = bspline->AtomicCenters; - app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; - // collect atomic centers by group - std::vector uniq_species; - for (int center_idx = 0; center_idx < centers.size(); center_idx++) { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - int found_idx = -1; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) { - found_idx = idx; - break; - } - if (found_idx < 0) - uniq_species.push_back(my_GroupID); + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + int found_idx = -1; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) + { + found_idx = idx; + break; } - // construct group list - std::vector> group_list(uniq_species.size()); - for (int center_idx = 0; center_idx < centers.size(); center_idx++) { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) { - group_list[idx].push_back(center_idx); - break; - } + if (found_idx < 0) + uniq_species.push_back(my_GroupID); + } + // construct group list + std::vector> group_list(uniq_species.size()); + for (int center_idx = 0; center_idx < centers.size(); center_idx++) + { + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) + { + group_list[idx].push_back(center_idx); + break; } + } - for (int group_idx = 0; group_idx < group_list.size(); group_idx++) { - const auto& mygroup = group_list[group_idx]; - const double spline_radius = centers[mygroup[0]].getSplineRadius(); - const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); - const int lmax = centers[mygroup[0]].getLmax(); - const double delta = - spline_radius / static_cast(spline_npoints - 1); - const int lm_tot = (lmax + 1) * (lmax + 1); - const size_t natoms = mygroup.size(); - const int policy = lm_tot > natoms ? 0 : 1; - - std::vector> i_power(lm_tot); - // rotate phase is introduced here. - std::complex i_temp(rotate_phase_r, rotate_phase_i); - for (size_t l = 0; l <= lmax; l++) { - for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) - i_power[lm] = i_temp; - i_temp *= std::complex(0.0, 1.0); - } - - std::vector> all_vals(natoms); - std::vector>> vals_local( - spline_npoints * omp_get_max_threads()); - VectorSoaContainer myRSoA(natoms); - for (size_t idx = 0; idx < natoms; idx++) { - all_vals[idx].resize(spline_npoints, lm_tot * 2); - all_vals[idx] = 0.0; - myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); - } + for (int group_idx = 0; group_idx < group_list.size(); group_idx++) + { + const auto& mygroup = group_list[group_idx]; + const double spline_radius = centers[mygroup[0]].getSplineRadius(); + const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); + const int lmax = centers[mygroup[0]].getLmax(); + const double delta = spline_radius / static_cast(spline_npoints - 1); + const int lm_tot = (lmax + 1) * (lmax + 1); + const size_t natoms = mygroup.size(); + const int policy = lm_tot > natoms ? 0 : 1; + + std::vector> i_power(lm_tot); + // rotate phase is introduced here. + std::complex i_temp(rotate_phase_r, rotate_phase_i); + for (size_t l = 0; l <= lmax; l++) + { + for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) + i_power[lm] = i_temp; + i_temp *= std::complex(0.0, 1.0); + } + + std::vector> all_vals(natoms); + std::vector>> vals_local(spline_npoints * omp_get_max_threads()); + VectorSoaContainer myRSoA(natoms); + for (size_t idx = 0; idx < natoms; idx++) + { + all_vals[idx].resize(spline_npoints, lm_tot * 2); + all_vals[idx] = 0.0; + myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); + } #pragma omp parallel + { + const size_t tid = omp_get_thread_num(); + const size_t nt = omp_get_num_threads(); + + for (int ip = 0; ip < spline_npoints; ip++) + { + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) + { + vals_local[ip_idx].resize(lm_tot * 2); + for (size_t lm = 0; lm < lm_tot * 2; lm++) { - const size_t tid = omp_get_thread_num(); - const size_t nt = omp_get_num_threads(); - - for (int ip = 0; ip < spline_npoints; ip++) { - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) { - vals_local[ip_idx].resize(lm_tot * 2); - for (size_t lm = 0; lm < lm_tot * 2; lm++) { - auto& vals = vals_local[ip_idx][lm]; - vals.resize(natoms); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - else { - vals_local[ip_idx].resize(natoms * 2); - for (size_t iat = 0; iat < natoms * 2; iat++) { - auto& vals = vals_local[ip_idx][iat]; - vals.resize(lm_tot); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - } + auto& vals = vals_local[ip_idx][lm]; + vals.resize(natoms); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + else + { + vals_local[ip_idx].resize(natoms * 2); + for (size_t iat = 0; iat < natoms * 2; iat++) + { + auto& vals = vals_local[ip_idx][iat]; + vals.resize(lm_tot); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + } - const size_t size_pw_tile = 32; - const size_t num_pw_tiles = - (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; - aligned_vector j_lm_G(lm_tot, 0.0); - std::vector> phase_shift_r(size_pw_tile); - std::vector> phase_shift_i(size_pw_tile); - std::vector> YlmG(size_pw_tile); - for (size_t ig = 0; ig < size_pw_tile; ig++) { - phase_shift_r[ig].resize(natoms); - phase_shift_i[ig].resize(natoms); - YlmG[ig].resize(lm_tot); - } - SoaSphericalTensor Ylm(lmax); + const size_t size_pw_tile = 32; + const size_t num_pw_tiles = (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; + aligned_vector j_lm_G(lm_tot, 0.0); + std::vector> phase_shift_r(size_pw_tile); + std::vector> phase_shift_i(size_pw_tile); + std::vector> YlmG(size_pw_tile); + for (size_t ig = 0; ig < size_pw_tile; ig++) + { + phase_shift_r[ig].resize(natoms); + phase_shift_i[ig].resize(natoms); + YlmG[ig].resize(lm_tot); + } + SoaSphericalTensor Ylm(lmax); #pragma omp for - for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) { - const size_t ig_first = tile_id * size_pw_tile; - const size_t ig_last = - std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); - for (size_t ig = ig_first; ig < ig_last; ig++) { - const size_t ig_local = ig - ig_first; - // calculate phase shift for all the centers of this - // group - Gvecs.calc_phase_shift(myRSoA, ig, - phase_shift_r[ig_local], phase_shift_i[ig_local]); - Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); - } - - for (int ip = 0; ip < spline_npoints; ip++) { - double r = delta * static_cast(ip); - const size_t ip_idx = tid * spline_npoints + ip; - - for (size_t ig = ig_first; ig < ig_last; ig++) { - const size_t ig_local = ig - ig_first; - // calculate spherical bessel function - Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); - for (size_t lm = 0; lm < lm_tot; lm++) - j_lm_G[lm] *= YlmG[ig_local][lm]; - - const double cG_r = cG[ig + gvec_first].real(); - const double cG_i = cG[ig + gvec_first].imag(); - if (policy == 1) { - for (size_t lm = 0; lm < lm_tot; lm++) { - double* restrict vals_r = - vals_local[ip_idx][lm * 2].data(); - double* restrict vals_i = - vals_local[ip_idx][lm * 2 + 1].data(); - const double* restrict ps_r_ptr = - phase_shift_r[ig_local].data(); - const double* restrict ps_i_ptr = - phase_shift_i[ig_local].data(); - double cG_j_r = cG_r * j_lm_G[lm]; - double cG_j_i = cG_i * j_lm_G[lm]; -#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr \ - : QMC_SIMD_ALIGNMENT) - for (size_t idx = 0; idx < natoms; idx++) { - const double ps_r = ps_r_ptr[idx]; - const double ps_i = ps_i_ptr[idx]; - vals_r[idx] += - cG_j_r * ps_r - cG_j_i * ps_i; - vals_i[idx] += - cG_j_i * ps_r + cG_j_r * ps_i; - } - } - } - else { - for (size_t idx = 0; idx < natoms; idx++) { - double* restrict vals_r = - vals_local[ip_idx][idx * 2].data(); - double* restrict vals_i = - vals_local[ip_idx][idx * 2 + 1].data(); - const double* restrict j_lm_G_ptr = - j_lm_G.data(); - double cG_ps_r = - cG_r * phase_shift_r[ig_local][idx] - - cG_i * phase_shift_i[ig_local][idx]; - double cG_ps_i = - cG_i * phase_shift_r[ig_local][idx] + - cG_r * phase_shift_i[ig_local][idx]; + for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) + { + const size_t ig_first = tile_id * size_pw_tile; + const size_t ig_last = std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); + for (size_t ig = ig_first; ig < ig_last; ig++) + { + const size_t ig_local = ig - ig_first; + // calculate phase shift for all the centers of this + // group + Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]); + Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); + } + + for (int ip = 0; ip < spline_npoints; ip++) + { + double r = delta * static_cast(ip); + const size_t ip_idx = tid * spline_npoints + ip; + + for (size_t ig = ig_first; ig < ig_last; ig++) + { + const size_t ig_local = ig - ig_first; + // calculate spherical bessel function + Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); + for (size_t lm = 0; lm < lm_tot; lm++) + j_lm_G[lm] *= YlmG[ig_local][lm]; + + const double cG_r = cG[ig + gvec_first].real(); + const double cG_i = cG[ig + gvec_first].imag(); + if (policy == 1) + { + for (size_t lm = 0; lm < lm_tot; lm++) + { + double* restrict vals_r = vals_local[ip_idx][lm * 2].data(); + double* restrict vals_i = vals_local[ip_idx][lm * 2 + 1].data(); + const double* restrict ps_r_ptr = phase_shift_r[ig_local].data(); + const double* restrict ps_i_ptr = phase_shift_i[ig_local].data(); + double cG_j_r = cG_r * j_lm_G[lm]; + double cG_j_i = cG_i * j_lm_G[lm]; +#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr : QMC_SIMD_ALIGNMENT) + for (size_t idx = 0; idx < natoms; idx++) + { + const double ps_r = ps_r_ptr[idx]; + const double ps_i = ps_i_ptr[idx]; + vals_r[idx] += cG_j_r * ps_r - cG_j_i * ps_i; + vals_i[idx] += cG_j_i * ps_r + cG_j_r * ps_i; + } + } + } + else + { + for (size_t idx = 0; idx < natoms; idx++) + { + double* restrict vals_r = vals_local[ip_idx][idx * 2].data(); + double* restrict vals_i = vals_local[ip_idx][idx * 2 + 1].data(); + const double* restrict j_lm_G_ptr = j_lm_G.data(); + double cG_ps_r = cG_r * phase_shift_r[ig_local][idx] - cG_i * phase_shift_i[ig_local][idx]; + double cG_ps_i = cG_i * phase_shift_r[ig_local][idx] + cG_r * phase_shift_i[ig_local][idx]; #pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT) - for (size_t lm = 0; lm < lm_tot; lm++) { - const double jlm = j_lm_G_ptr[lm]; - vals_r[lm] += cG_ps_r * jlm; - vals_i[lm] += cG_ps_i * jlm; - } - } - } - } - } + for (size_t lm = 0; lm < lm_tot; lm++) + { + const double jlm = j_lm_G_ptr[lm]; + vals_r[lm] += cG_ps_r * jlm; + vals_i[lm] += cG_ps_i * jlm; + } } + } + } + } + } #pragma omp for collapse(2) - for (int ip = 0; ip < spline_npoints; ip++) - for (size_t idx = 0; idx < natoms; idx++) { - double* vals = all_vals[idx][ip]; - for (size_t tid = 0; tid < nt; tid++) - for (size_t lm = 0; lm < lm_tot; lm++) { - double vals_th_r, vals_th_i; - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) { - vals_th_r = vals_local[ip_idx][lm * 2][idx]; - vals_th_i = - vals_local[ip_idx][lm * 2 + 1][idx]; - } - else { - vals_th_r = vals_local[ip_idx][idx * 2][lm]; - vals_th_i = - vals_local[ip_idx][idx * 2 + 1][lm]; - } - const double real_tmp = - 4.0 * M_PI * i_power[lm].real(); - const double imag_tmp = - 4.0 * M_PI * i_power[lm].imag(); - vals[lm] += - vals_th_r * real_tmp - vals_th_i * imag_tmp; - vals[lm + lm_tot] += - vals_th_i * real_tmp + vals_th_r * imag_tmp; - } - } - } - // app_log() << "Building band " << iorb << " at center " << - // center_idx << std::endl; - - for (size_t idx = 0; idx < natoms; idx++) { - // reduce all_vals - band_group_comm.reduce_in_place( - all_vals[idx].data(), all_vals[idx].size()); - if (!band_group_comm.isGroupLeader()) - continue; -#pragma omp parallel for - for (int lm = 0; lm < lm_tot; lm++) { - auto& mycenter = centers[mygroup[idx]]; - aligned_vector splineData_r(spline_npoints); - UBspline_1d_d* atomic_spline_r = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_r[ip] = all_vals[idx][ip][lm]; - atomic_spline_r = einspline::create(atomic_spline_r, 0.0, - spline_radius, spline_npoints, splineData_r.data(), - ((lm == 0) || (lm > 3))); - if (!bspline->isComplex()) { - mycenter.set_spline(atomic_spline_r, lm, iorb); - einspline::destroy(atomic_spline_r); - } - else { - aligned_vector splineData_i(spline_npoints); - UBspline_1d_d* atomic_spline_i = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; - atomic_spline_i = einspline::create(atomic_spline_i, - 0.0, spline_radius, spline_npoints, - splineData_i.data(), ((lm == 0) || (lm > 3))); - mycenter.set_spline(atomic_spline_r, lm, iorb * 2); - mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); - einspline::destroy(atomic_spline_r); - einspline::destroy(atomic_spline_i); - } + for (int ip = 0; ip < spline_npoints; ip++) + for (size_t idx = 0; idx < natoms; idx++) + { + double* vals = all_vals[idx][ip]; + for (size_t tid = 0; tid < nt; tid++) + for (size_t lm = 0; lm < lm_tot; lm++) + { + double vals_th_r, vals_th_i; + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) + { + vals_th_r = vals_local[ip_idx][lm * 2][idx]; + vals_th_i = vals_local[ip_idx][lm * 2 + 1][idx]; } - } + else + { + vals_th_r = vals_local[ip_idx][idx * 2][lm]; + vals_th_i = vals_local[ip_idx][idx * 2 + 1][lm]; + } + const double real_tmp = 4.0 * M_PI * i_power[lm].real(); + const double imag_tmp = 4.0 * M_PI * i_power[lm].imag(); + vals[lm] += vals_th_r * real_tmp - vals_th_i * imag_tmp; + vals[lm + lm_tot] += vals_th_i * real_tmp + vals_th_r * imag_tmp; + } + } + } + // app_log() << "Building band " << iorb << " at center " << + // center_idx << std::endl; + + for (size_t idx = 0; idx < natoms; idx++) + { + // reduce all_vals + band_group_comm.reduce_in_place(all_vals[idx].data(), all_vals[idx].size()); + if (!band_group_comm.isGroupLeader()) + continue; +#pragma omp parallel for + for (int lm = 0; lm < lm_tot; lm++) + { + auto& mycenter = centers[mygroup[idx]]; + aligned_vector splineData_r(spline_npoints); + UBspline_1d_d* atomic_spline_r = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_r[ip] = all_vals[idx][ip][lm]; + atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(), + ((lm == 0) || (lm > 3))); + if (!bspline->isComplex()) + { + mycenter.set_spline(atomic_spline_r, lm, iorb); + einspline::destroy(atomic_spline_r); + } + else + { + aligned_vector splineData_i(spline_npoints); + UBspline_1d_d* atomic_spline_i = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; + atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints, + splineData_i.data(), ((lm == 0) || (lm > 3))); + mycenter.set_spline(atomic_spline_r, lm, iorb * 2); + mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); + einspline::destroy(atomic_spline_r); + einspline::destroy(atomic_spline_i); + } } + } } + } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp index 9c02ad06d2d..a23b5ec2898 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp @@ -20,485 +20,452 @@ namespace qmcplusplus { -template -SplineC2COMPTargetT::SplineC2COMPTargetT( - const SplineC2COMPTargetT& in) = default; - -template -inline void -SplineC2COMPTargetT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, int twist, int ispline, int level) +template +SplineC2COMPTargetT::SplineC2COMPTargetT(const SplineC2COMPTargetT& in) = default; + +template +inline void SplineC2COMPTargetT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool -SplineC2COMPTargetT::read_splines(hdf_archive& h5f) +template +bool SplineC2COMPTargetT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool -SplineC2COMPTargetT::write_splines(hdf_archive& h5f) +template +bool SplineC2COMPTargetT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void -SplineC2COMPTargetT::assign_v(const PointType& r, - const vContainer_type& myV, ValueVector& psi, int first, int last) const +template +inline void SplineC2COMPTargetT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); + const ST x = r[0], y = r[1], z = r[2]; + const ST* restrict kx = myKcart->data(0); + const ST* restrict ky = myKcart->data(1); + const ST* restrict kz = myKcart->data(2); #pragma omp simd - for (size_t j = first; j < last; ++j) { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + this->first_spo] = - ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } + for (size_t j = first; j < last; ++j) + { + ST s, c; + const ST val_r = myV[2 * j]; + const ST val_i = myV[2 * j + 1]; + omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); + } } -template -void -SplineC2COMPTargetT::evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) +template +void SplineC2COMPTargetT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and - // evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), - omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first / 2, last / 2); + } } -template -void -SplineC2COMPTargetT::evaluateDetRatios( - const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) +template +void SplineC2COMPTargetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 3); - - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - - // pack particle positions - auto* restrict pos_scratch = - reinterpret_cast(psiinv_pos_copy.data() + psiinv.size()); - for (int iat = 0; iat < nVP; ++iat) { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } + const int nVP = VP.getTotalNum(); + psiinv_pos_copy.resize(psiinv.size() + nVP * 3); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * nVP); - const auto orb_size = psiinv.size(); - results_scratch.resize(padded_size * nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = this->first_spo; + // stage psiinv to psiinv_pos_copy + std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ + // pack particle positions + auto* restrict pos_scratch = reinterpret_cast(psiinv_pos_copy.data() + psiinv.size()); + for (int iat = 0; iat < nVP; ++iat) + { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_scratch[iat * 6] = r[0]; + pos_scratch[iat * 6 + 1] = r[1]; + pos_scratch[iat * 6 + 2] = r[2]; + pos_scratch[iat * 6 + 3] = ru[0]; + pos_scratch[iat * 6 + 4] = ru[1]; + pos_scratch[iat * 6 + 5] = ru[2]; + } + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + ratios_private.resize(nVP, NumTeams); + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * nVP); + const auto orb_size = psiinv.size(); + results_scratch.resize(padded_size * nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* psiinv_ptr = psiinv_pos_copy.data(); + auto* ratios_private_ptr = ratios_private.data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = - offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = - results_scratch_ptr + padded_size * iat; - auto* restrict pos_scratch = - reinterpret_cast(psiinv_ptr + orb_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, - ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(ST(pos_scratch[iat * 6]), - ST(pos_scratch[iat * 6 + 1]), - ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr, - offload_scratch_iat_ptr, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } + for (int iat = 0; iat < nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; + auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; + auto* restrict pos_scratch = reinterpret_cast(psiinv_ptr + orb_size); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), + ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), + psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); + + ComplexT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_cplx; i < last_cplx; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + for (int iat = 0; iat < nVP; ++iat) + { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < NumTeams; tid++) + ratios[iat] += ratios_private[iat][tid]; + } } -template -void -SplineC2COMPTargetT::mw_evaluateDetRatios( +template +void SplineC2COMPTargetT::mw_evaluateDetRatios( const RefVectorWithLeader>& spo_list, const RefVectorWithLeader>& vp_list, const RefVector& psi_list, const std::vector& invRow_ptr_list, std::vector>& ratios_list) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSetT& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = - nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer( - reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast( - det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + - nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) { - const VirtualParticleSetT& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; - } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto padded_size = myV.size(); - mw_offload_scratch.resize(padded_size * mw_nVP); - mw_results_scratch.resize(padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = this->first_spo; - + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; + auto& mw_ratios_private = mw_mem.mw_ratios_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const size_t nw = spo_list.size(); + const size_t orb_size = phi_leader.size(); + + size_t mw_nVP = 0; + for (const VirtualParticleSetT& VP : vp_list) + mw_nVP += VP.getTotalNum(); + + const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); + det_ratios_buffer_H2D.resize(packed_size); + + // pack invRow_ptr_list to det_ratios_buffer_H2D + Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); + for (size_t iw = 0; iw < nw; iw++) + ptr_buffer[iw] = invRow_ptr_list[iw]; + + // pack particle positions + auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); + auto* ref_id_ptr = + reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); + size_t iVP = 0; + for (size_t iw = 0; iw < nw; iw++) + { + const VirtualParticleSetT& VP = vp_list[iw]; + assert(ratios_list[iw].size() == VP.getTotalNum()); + for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ + ref_id_ptr[iVP] = iw; + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_ptr[0] = r[0]; + pos_ptr[1] = r[1]; + pos_ptr[2] = r[2]; + pos_ptr[3] = ru[0]; + pos_ptr[4] = ru[1]; + pos_ptr[5] = ru[2]; + pos_ptr += 6; + } + } + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + mw_ratios_private.resize(mw_nVP, NumTeams); + const auto padded_size = myV.size(); + mw_offload_scratch.resize(padded_size * mw_nVP); + mw_results_scratch.resize(padded_size * mw_nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); + auto* ratios_private_ptr = mw_ratios_private.data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = - offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = - results_scratch_ptr + padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + - nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - auto* restrict psiinv_ptr = reinterpret_cast( - buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast( - buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, - pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4], - pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(pos_scratch[iat * 6], - pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; - } + for (int iat = 0; iat < mw_nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; + auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; + auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); + auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; + auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4], + pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_v(pos_scratch[iat * 6], pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], psi_iat_ptr, + offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); + + ComplexT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_cplx; i < last_cplx; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + iVP = 0; + for (size_t iw = 0; iw < nw; iw++) + { + auto& ratios = ratios_list[iw]; + for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) + { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < NumTeams; ++tid) + ratios[iat] += mw_ratios_private[iVP][tid]; } + } } /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ -template -inline void -SplineC2COMPTargetT::assign_vgl_from_l( - const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +inline void SplineC2COMPTargetT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; + constexpr ST two(2); + const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); - const size_t N = this->last_spo - this->first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd - for (size_t j = 0; j < N; ++j) { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + - two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = - ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } + for (size_t j = 0; j < N; ++j) + { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); + } } -template -void -SplineC2COMPTargetT::evaluateVGL(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SplineC2COMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi.size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS); + const auto orb_size = psi.size(); + // for V(1)G(3)L(1) final result + results_scratch.resize(padded_size * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto x = r[0], y = r[1], z = r[2]; + const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ map(always, from: results_scratch_ptr[0:padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, - iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], - GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_ptr[padded_size * SoAFields3D::LAPL + - output_index] = - SymTrace( - offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + - output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + - output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + - output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + - output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + - output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + - output_index], - symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, - mKK_ptr, offload_scratch_ptr, padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - } - } - - for (size_t i = 0; i < orb_size; i++) { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + padded_size]; - dpsi[i][1] = results_scratch[i + padded_size * 2]; - dpsi[i][2] = results_scratch[i + padded_size * 3]; - d2psi[i] = results_scratch[i + padded_size * 4]; + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, + offload_scratch_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_ptr[padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); + } + + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, mKK_ptr, offload_scratch_ptr, padded_size, G, + myKcart_ptr, myKcart_padded_size, first_spo_local, index); } + } + + for (size_t i = 0; i < orb_size; i++) + { + psi[i] = results_scratch[i]; + dpsi[i][0] = results_scratch[i + padded_size]; + dpsi[i][1] = results_scratch[i + padded_size * 2]; + dpsi[i][2] = results_scratch[i + padded_size * 3]; + d2psi[i] = results_scratch[i + padded_size * 4]; + } } -template -void -SplineC2COMPTargetT::evaluateVGLMultiPos( +template +void SplineC2COMPTargetT::evaluateVGLMultiPos( const Vector>& multi_pos, Vector>& offload_scratch, Vector>& results_scratch, @@ -506,907 +473,758 @@ SplineC2COMPTargetT::evaluateVGLMultiPos( const RefVector& dpsi_v_list, const RefVector& d2psi_v_list) const { - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi_v_list[0].get().size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + const size_t num_pos = psi_v_list.size(); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const auto padded_size = myV.size(); + offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); + const auto orb_size = psi_v_list[0].get().size(); + // for V(1)G(3)L(1) final result + results_scratch.resize(padded_size * num_pos * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* pos_copy_ptr = multi_pos.data(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: pos_copy_ptr[0:num_pos*6]) \ map(always, from: results_scratch_ptr[0:padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + - padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = - results_scratch_ptr + padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], - d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, - pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, - d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], - GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + - output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS00 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS01 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS02 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS11 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS12 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS22 + - output_index], - symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_copy_ptr[iw * 6], - pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], - psi_iw_ptr, padded_size, mKK_ptr, - offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) { - auto* restrict results_iw_ptr = - results_scratch_ptr + padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < orb_size; i++) { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + padded_size * 4]; + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], + pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, + d2c, offload_scratch_iw_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); } + + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, + padded_size, mKK_ptr, offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, + myKcart_padded_size, first_spo_local, index); + } + } + + for (int iw = 0; iw < num_pos; ++iw) + { + auto* restrict results_iw_ptr = results_scratch_ptr + padded_size * iw * 5; + ValueVector& psi_v(psi_v_list[iw]); + GradVector& dpsi_v(dpsi_v_list[iw]); + ValueVector& d2psi_v(d2psi_v_list[iw]); + for (size_t i = 0; i < orb_size; i++) + { + psi_v[i] = results_iw_ptr[i]; + dpsi_v[i][0] = results_iw_ptr[i + padded_size]; + dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2]; + dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3]; + d2psi_v[i] = results_iw_ptr[i + padded_size * 4]; } + } } -template -void -SplineC2COMPTargetT::mw_evaluateVGL( - const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2COMPTargetT::mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, - mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); + assert(this == &sa_list.getLeader()); + auto& phi_leader = sa_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& mw_pos_copy = mw_mem.mw_pos_copy; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = sa_list.size(); + mw_pos_copy.resize(nwalkers * 6); + + // pack particle positions + for (int iw = 0; iw < nwalkers; ++iw) + { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + mw_pos_copy[iw * 6] = r[0]; + mw_pos_copy[iw * 6 + 1] = r[1]; + mw_pos_copy[iw * 6 + 2] = r[2]; + mw_pos_copy[iw * 6 + 3] = ru[0]; + mw_pos_copy[iw * 6 + 4] = ru[1]; + mw_pos_copy[iw * 6 + 5] = ru[2]; + } + + phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, + d2psi_v_list); } -template -void -SplineC2COMPTargetT::mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const +template +void SplineC2COMPTargetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast( - buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto orb_size = phi_vgl_v.size(2); - const auto padded_size = myV.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = this->first_spo; - const size_t phi_vgl_stride = num_pos * orb_size; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& buffer_H2D = mw_mem.buffer_H2D; + auto& rg_private = mw_mem.rg_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = spo_list.size(); + buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); + + // pack particle positions and invRow pointers. + for (int iw = 0; iw < nwalkers; ++iw) + { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); + + pos_copy[0] = r[0]; + pos_copy[1] = r[1]; + pos_copy[2] = r[2]; + pos_copy[3] = ru[0]; + pos_copy[4] = ru[1]; + pos_copy[5] = ru[2]; + + auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); + invRow_ptr = invRow_ptr_list[iw]; + } + + const size_t num_pos = nwalkers; + const auto orb_size = phi_vgl_v.size(2); + const auto padded_size = myV.size(); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); + // for V(1)G(3)L(1) final result + mw_results_scratch.resize(padded_size * num_pos * 5); + // per team ratio and grads + rg_private.resize(num_pos, NumTeams * 4); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* buffer_H2D_ptr = buffer_H2D.data(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + auto* phi_vgl_ptr = phi_vgl_v.data(); + auto* rg_private_ptr = rg_private.data(); + const size_t buffer_H2D_stride = buffer_H2D.cols(); + const size_t first_spo_local = this->first_spo; + const size_t phi_vgl_stride = num_pos * orb_size; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + - padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = - results_scratch_ptr + padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast( - buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + - buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], - d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], - pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, c, da, db, - dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], - GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + - output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS00 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS01 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS02 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS11 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS12 + - output_index], - offload_scratch_iw_ptr[padded_size * - SoAFields3D::HESS22 + - output_index], - symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], - psi_iw_ptr, padded_size, mKK_ptr, - offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + padded_size; - ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2; - ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4; - - ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for \ - reduction(+: ratio, grad_x, grad_y, grad_z)") - for (size_t j = first_cplx; j < last_cplx; j++) { - const size_t psiIndex = first_spo_local + j; - - out_phi[psiIndex] = psi[psiIndex]; - out_dphi_x[psiIndex] = dpsi_x[psiIndex]; - out_dphi_y[psiIndex] = dpsi_y[psiIndex]; - out_dphi_z[psiIndex] = dpsi_z[psiIndex]; - out_d2phi[psiIndex] = d2psi[psiIndex]; - - ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex]; - } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; + const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); + const auto* restrict invRow_iw_ptr = + *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, + c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, + d2c, offload_scratch_iw_ptr + first + index, padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); + } - for (int iw = 0; iw < num_pos; iw++) { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; + const size_t first_cplx = first / 2; + const size_t last_cplx = omptarget::min(last / 2, orb_size); + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, padded_size, mKK_ptr, + offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, + index); + + ValueType* restrict psi = psi_iw_ptr; + ValueType* restrict dpsi_x = psi_iw_ptr + padded_size; + ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2; + ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3; + ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4; + + ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size; + ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; + ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; + ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; + ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; + + ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); + PRAGMA_OFFLOAD("omp parallel for \ + reduction(+: ratio, grad_x, grad_y, grad_z)") + for (size_t j = first_cplx; j < last_cplx; j++) + { + const size_t psiIndex = first_spo_local + j; + + out_phi[psiIndex] = psi[psiIndex]; + out_dphi_x[psiIndex] = dpsi_x[psiIndex]; + out_dphi_y[psiIndex] = dpsi_y[psiIndex]; + out_dphi_z[psiIndex] = dpsi_z[psiIndex]; + out_d2phi[psiIndex] = d2psi[psiIndex]; + + ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex]; + grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex]; } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; + + rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; + } + } + + for (int iw = 0; iw < num_pos; iw++) + { + ValueType ratio(0); + for (int team_id = 0; team_id < NumTeams; team_id++) + ratio += rg_private[iw][team_id * 4]; + ratios[iw] = ratio; + + ValueType grad_x(0), grad_y(0), grad_z(0); + for (int team_id = 0; team_id < NumTeams; team_id++) + { + grad_x += rg_private[iw][team_id * 4 + 1]; + grad_y += rg_private[iw][team_id * 4 + 2]; + grad_z += rg_private[iw][team_id * 4 + 3]; } + grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; + } } -template -void -SplineC2COMPTargetT::assign_vgh(const PointType& r, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const +template +void SplineC2COMPTargetT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), - g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), - g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < last; ++j) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02) + - kX * (gX_i + dX_i); - const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12) + - kX * (gY_i + dY_i); - const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22) + - kX * (gZ_i + dZ_i); - const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g00, g01, g02) + - kY * (gX_i + dX_i); - const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12) + - kY * (gY_i + dY_i); - const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22) + - kY * (gZ_i + dZ_i); - const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g00, g01, g02) + - kZ * (gX_i + dX_i); - const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g10, g11, g12) + - kZ * (gY_i + dY_i); - const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22) + - kZ * (gZ_i + dZ_i); - - const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02) - - kX * (gX_r + dX_r); - const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12) - - kX * (gY_r + dY_r); - const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22) - - kX * (gZ_r + dZ_r); - const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g00, g01, g02) - - kY * (gX_r + dX_r); - const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12) - - kY * (gY_r + dY_r); - const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22) - - kY * (gZ_r + dZ_r); - const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g00, g01, g02) - - kZ * (gX_r + dX_r); - const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g10, g11, g12) - - kZ * (gY_r + dY_r); - const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22) - - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = - ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = - ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = - ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = - ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = - ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = - ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = - ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = - ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = - ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } + for (size_t j = first; j < last; ++j) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + const ST h_xx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); + const ST h_xy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); + const ST h_xz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); + const ST h_yx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); + const ST h_yy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); + const ST h_yz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); + const ST h_zx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); + const ST h_zy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); + const ST h_zz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); + const ST h_xy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); + const ST h_xz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); + const ST h_yx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); + const ST h_yy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); + const ST h_yz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); + const ST h_zx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); + const ST h_zy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); + const ST h_zz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); + grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); + grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); + grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + } } -template -void -SplineC2COMPTargetT::evaluateVGH(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2COMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and - // evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), - omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh( - SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } + { + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); + } } -template -void -SplineC2COMPTargetT::assign_vghgh(const PointType& r, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, - int first, int last) const +template +void SplineC2COMPTargetT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { - // protect last - last = last < 0 ? - this->kPoints.size() : - (last > this->kPoints.size() ? this->kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), - g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), - g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); + // protect last + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); // SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd - for (size_t j = first; j < last; ++j) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + this->first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - // intermediates for computation of hessian. \partial_i \partial_j phi - // in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = - ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = - ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = - ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = - ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = - ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = - ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = - ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = - ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = - ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - // These are the real and imaginary components of the third SPO - // derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, - // and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - // Here is where we build up the components of the physical hessian - // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + - kX * kX * kX * val_r; - const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + - (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + - kX * kY * kZ * val_r; - const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + - kY * kY * kY * val_r; - const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + - kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = - ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = - ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = - ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = - ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = - ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = - ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = - ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = - ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = - ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = - ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = - ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = - ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = - ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = - ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - grad_grad_grad_psi[psiIndex][2][0] = - ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = - ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = - ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = - ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = - ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = - ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = - ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = - ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } + for (size_t j = first; j < last; ++j) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = j + this->first_spo; + psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); + dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); + dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); + dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); + grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); + grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); + grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); + grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); + grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; + const ST gh_xxy_r = + f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = + f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = + f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = + f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = + f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = + f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; + const ST gh_xzz_r = + f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = + f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; + const ST gh_yyz_r = + f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = + f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = + f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = + f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; + + grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); + grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + + grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); + grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); + grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); + grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + + grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); + grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); + grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); + grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); + grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); + grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); + } } -template -void -SplineC2COMPTargetT::evaluateVGHGH(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) +template +void SplineC2COMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(2 * psi.size(), getAlignment(), - omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh( - SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, - last / 2); - } + { + int first, last; + FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); + } } -template -void -SplineC2COMPTargetT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SplineC2COMPTargetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; - iat += block_size, i += block_size) { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols()); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols()); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols()); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } - - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, - psi_v_list, dpsi_v_list, d2psi_v_list); + // chunk the [first, last) loop into blocks to save temporary memory usage + const int block_size = 16; + + // reference vectors refer to the rows of matrices + std::vector multi_psi_v; + std::vector multi_dpsi_v; + std::vector multi_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + + multi_psi_v.reserve(block_size); + multi_dpsi_v.reserve(block_size); + multi_d2psi_v.reserve(block_size); + psi_v_list.reserve(block_size); + dpsi_v_list.reserve(block_size); + d2psi_v_list.reserve(block_size); + + for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) + { + const int actual_block_size = std::min(last - iat, block_size); + multi_pos_copy.resize(actual_block_size * 6); + multi_psi_v.clear(); + multi_dpsi_v.clear(); + multi_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); + + for (int ipos = 0; ipos < actual_block_size; ++ipos) + { + // pack particle positions + const PointType& r = P.activeR(iat + ipos); + PointType ru(PrimLattice.toUnit_floor(r)); + multi_pos_copy[ipos * 6] = r[0]; + multi_pos_copy[ipos * 6 + 1] = r[1]; + multi_pos_copy[ipos * 6 + 2] = r[2]; + multi_pos_copy[ipos * 6 + 3] = ru[0]; + multi_pos_copy[ipos * 6 + 4] = ru[1]; + multi_pos_copy[ipos * 6 + 5] = ru[2]; + + multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols()); + multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols()); + multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols()); + + psi_v_list.push_back(multi_psi_v[ipos]); + dpsi_v_list.push_back(multi_dpsi_v[ipos]); + d2psi_v_list.push_back(multi_d2psi_v[ipos]); } + + evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); + } } template class SplineC2COMPTargetT>; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h index 86c20dfd5da..11dddeef37f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h @@ -40,337 +40,289 @@ namespace qmcplusplus * The internal storage of complex spline coefficients uses double sized real * arrays of ST type, aligned and padded. All the output orbitals are complex. */ -template +template class SplineC2COMPTargetT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using ComplexT = typename BsplineSetT::ValueType; - using typename BsplineSetT::ValueType; - using typename BsplineSetT::RealType; - using typename BsplineSetT::GradType; - using typename BsplineSetT::GGGVector; - using typename BsplineSetT::GradVector; - using typename BsplineSetT::GradMatrix; - using typename BsplineSetT::HessVector; - using typename BsplineSetT::ValueVector; - using typename BsplineSetT::ValueMatrix; - using typename BsplineSetT::OffloadMWVGLArray; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + template + using OffloadVector = Vector>; + template + using OffloadPosVector = VectorSoaContainer>; private: - /// timer for offload portion - NewTimer& offload_timer_; - /// primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to - /// CartesianUnit, e.g. Hessian - Tensor GGt; - /// multi bspline set - std::shared_ptr< - MultiBspline, OffloadAllocator>> - SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - /// team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - /// offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - /// result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - /// psiinv and position scratch space, used to avoid allocation on the fly - /// and faster transfer - Vector> psiinv_pos_copy; - /// position scratch space, used to avoid allocation on the fly and faster - /// transfer - Vector> multi_pos_copy; - - void - evaluateVGLMultiPos( - const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; + /// timer for offload portion + NewTimer& offload_timer_; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr, OffloadAllocator>> SplineInst; + + std::shared_ptr> mKK; + std::shared_ptr> myKcart; + std::shared_ptr> GGt_offload; + std::shared_ptr> PrimLattice_G_offload; + + ResourceHandle> mw_mem_handle_; + + /// team private ratios for reduction, numVP x numTeams + Matrix> ratios_private; + /// offload scratch space, dynamically resized to the maximal need + Vector> offload_scratch; + /// result scratch space, dynamically resized to the maximal need + Vector> results_scratch; + /// psiinv and position scratch space, used to avoid allocation on the fly + /// and faster transfer + Vector> psiinv_pos_copy; + /// position scratch space, used to avoid allocation on the fly and faster + /// transfer + Vector> multi_pos_copy; + + void evaluateVGLMultiPos(const Vector>& multi_pos_copy, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2COMPTargetT(const std::string& my_name) : - BsplineSetT(my_name), - offload_timer_( - createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), + SplineC2COMPTargetT(const std::string& my_name) + : BsplineSetT(my_name), + offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), GGt_offload(std::make_shared>(9)), PrimLattice_G_offload(std::make_shared>(9)) - { - } - - SplineC2COMPTargetT(const SplineC2COMPTargetT& in); - - virtual std::string - getClassName() const override - { - return "SplineC2COMPTarget"; - } - virtual std::string - getKeyword() const override - { - return "SplineC2C"; - } - bool - isComplex() const override - { - return true; - }; - virtual bool - isOMPoffload() const override - { - return true; - } - - void - createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource( - std::make_unique>()); - } - - void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader(); - phi_leader.mw_mem_handle_ = - collection - .lendResource>(); - } - - void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr> - makeClone() const override - { - return std::make_unique(*this); - } - - inline void - resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), - SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void - create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, - OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) - << " MB allocated " - << "for the coefficients in 3D spline orbital representation" - << std::endl; - } - - /// this routine can not be called from threaded region - void - finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target \ + {} + + SplineC2COMPTargetT(const SplineC2COMPTargetT& in); + + virtual std::string getClassName() const override { return "SplineC2COMPTarget"; } + virtual std::string getKeyword() const override { return "SplineC2C"; } + bool isComplex() const override { return true; }; + virtual bool isOMPoffload() const override { return true; } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique>()); + } + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + phi_leader.mw_mem_handle_ = collection.lendResource>(); + } + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + collection.takebackResource(phi_leader.mw_mem_handle_); + } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared, OffloadAllocator>>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + /// this routine can not be called from threaded region + void finalizeConstruction() override + { + // map the SplineInst->getSplinePtr() structure to GPU + auto* MultiSpline = SplineInst->getSplinePtr(); + auto* restrict coefs = MultiSpline->coefs; + // attach pointers on the device to achieve deep copy + PRAGMA_OFFLOAD("omp target \ map(always, to: MultiSpline[0:1], \ coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD( - "omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (size_t i = 0; i < 9; i++) { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + { + MultiSpline->coefs = coefs; } - inline void - flush_zero() + // transfer static data to GPU + auto* mKK_ptr = mKK->data(); + PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") + auto* myKcart_ptr = myKcart->data(); + PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") + for (size_t i = 0; i < 9; i++) { - SplineInst->flush_zero(); + (*GGt_offload)[i] = GGt[i]; + (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; } - - /** remap kPoints to pack the double copy */ - inline void - resize_kpoints() + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") + auto* GGt_ptr = GGt_offload->data(); + PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK = std::make_shared>(nk); + myKcart = std::make_shared>(nk); + for (size_t i = 0; i < nk; ++i) { - const size_t nk = this->kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) { - (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); - (*myKcart)(i) = this->kPoints[i]; - } + (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); + (*myKcart)(i) = this->kPoints[i]; } + } - void - set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, - int twist, int ispline, int level); + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - bool - read_splines(hdf_archive& h5f); + bool read_splines(hdf_archive& h5f); - bool - write_splines(hdf_archive& h5f); + bool write_splines(hdf_archive& h5f); - void - assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, - int first, int last) const; + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - virtual void - evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + virtual void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - virtual void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override; + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; - virtual void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ - void - assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi); - - virtual void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - virtual void - mw_evaluateVGL(const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void - mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const override; - - void - assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, int first, int last) const; - - virtual void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override; - - void - assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, - int last = -1) const; - - virtual void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + virtual void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + virtual void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + virtual void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + virtual void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp index ce4855d11bc..dc68edbb82d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp @@ -33,26 +33,26 @@ inline void SplineC2CT::set_spline(SingleSplineType* spline_r, int ispline, int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } template bool SplineC2CT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } template bool SplineC2CT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } template @@ -62,7 +62,7 @@ void SplineC2CT::storeParamsBeforeRotation() const auto coefs_tot_size = spline_ptr->coefs_size; coef_copy_ = std::make_shared>(coefs_tot_size); - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); + std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); } /* @@ -190,8 +190,8 @@ inline void SplineC2CT::assign_v(const PointType& r, template void SplineC2CT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -211,46 +211,44 @@ void SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, const ValueVector& psiinv, std::vector& ratios) { - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); #pragma omp parallel + { + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to - // the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier - } - int first, last; - // Factor of 2 because psi is complex and the spline storage and - // evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), - omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = - this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d( - SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, - psiinv.data() + first_cplx, last_cplx - first_cplx); - } } + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); + const int first_cplx = first / 2; + const int last_cplx = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first_cplx, last_cplx); + ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx); } + } + + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; + } } /** assign_vgl @@ -354,7 +352,7 @@ inline void SplineC2CT::assign_vgl_from_l(const PointType& r, const ST* restrict g1 = myG.data(1); const ST* restrict g2 = myG.data(2); - const size_t N = this->last_spo - this->first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd for (size_t j = 0; j < N; ++j) { @@ -407,8 +405,8 @@ void SplineC2CT::evaluateVGL(const ParticleSetT& P, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -548,8 +546,8 @@ void SplineC2CT::evaluateVGH(const ParticleSetT& P, GradVector& dpsi, HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -809,8 +807,8 @@ void SplineC2CT::evaluateVGHGH(const ParticleSetT& P, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { int first, last; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h index e48a285ef1f..a4065d74c26 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h @@ -35,98 +35,75 @@ namespace qmcplusplus * The internal storage of complex spline coefficients uses double sized real * arrays of ST type, aligned and padded. All the output orbitals are complex. */ -template +template class SplineC2CT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - // types for evaluation results - using ComplexT = typename BsplineSetT::ValueType; - using typename BsplineSetT::IndexType; - using typename BsplineSetT::ValueType; - using typename BsplineSetT::RealType; - using typename BsplineSetT::GGGVector; - using typename BsplineSetT::GradVector; - using typename BsplineSetT::HessVector; - using typename BsplineSetT::ValueVector; - using typename BsplineSetT::ValueMatrix; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::IndexType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; private: - /// primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to - ///CartesianUnit, e.g. Hessian - Tensor GGt; - /// multi bspline set - std::shared_ptr> SplineInst; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; - /// Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; - vContainer_type mKK; - VectorSoaContainer myKcart; + vContainer_type mKK; + VectorSoaContainer myKcart; - /// thread private ratios for reduction when using nested threading, numVP x - /// numThread - Matrix ratios_private; + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2CT(const std::string& my_name) : BsplineSetT(my_name) - { - } + SplineC2CT(const std::string& my_name) : BsplineSetT(my_name) {} - SplineC2CT(const SplineC2CT& in); - virtual std::string - getClassName() const override - { - return "SplineC2C"; - } - virtual std::string - getKeyword() const override - { - return "SplineC2C"; - } - bool - isComplex() const override - { - return true; - }; + SplineC2CT(const SplineC2CT& in); + virtual std::string getClassName() const override { return "SplineC2C"; } + virtual std::string getKeyword() const override { return "SplineC2C"; } + bool isComplex() const override { return true; }; - std::unique_ptr> - makeClone() const override - { - return std::make_unique(*this); - } + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } - bool - isRotationSupported() const override - { - return true; - } + bool isRotationSupported() const override { return true; } - /// Store an original copy of the spline coefficients for orbital rotation - void - storeParamsBeforeRotation() override; + /// Store an original copy of the spline coefficients for orbital rotation + void storeParamsBeforeRotation() override; - /* + /* Implements orbital rotations via [1,2]. Should be called by RotatedSPOs::apply_rotation() This implementation requires that NSPOs > Nelec. In other words, @@ -136,135 +113,122 @@ class SplineC2CT : public BsplineSetT [2] Toulouse & Umrigar, JCP 126, (2007) [3] Townsend et al., PRB 102, (2020) */ - void - applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void - resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), - SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void - create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) - << " MB allocated " - << "for the coefficients in 3D spline orbital representation" - << std::endl; - } - - inline void - flush_zero() - { - SplineInst->flush_zero(); - } - - /** remap kPoints to pack the double copy */ - inline void - resize_kpoints() + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) { - const size_t nk = this->kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) { - mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); - myKcart(i) = this->kPoints[i]; - } + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; } + } - void - set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, - int twist, int ispline, int level); + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - bool - read_splines(hdf_archive& h5f); + bool read_splines(hdf_archive& h5f); - bool - write_splines(hdf_archive& h5f); + bool write_splines(hdf_archive& h5f); - void - assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, - int first, int last) const; + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - void - evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override; + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; - /** assign_vgl + /** assign_vgl */ - void - assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi, int first, int last) const; + void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) + const; - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ - void - assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi); - - void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - void - assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, int first, int last) const; - - void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override; - - void - assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, - int last = -1) const; - - void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp index 1e3e02cd6af..8e6a4dd7bf8 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp @@ -19,1911 +19,1694 @@ namespace qmcplusplus { -template -SplineC2ROMPTargetT::SplineC2ROMPTargetT( - const SplineC2ROMPTargetT& in) = default; - -template -inline void -SplineC2ROMPTargetT::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, int twist, int ispline, int level) +template +SplineC2ROMPTargetT::SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in) = default; + +template +inline void SplineC2ROMPTargetT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool -SplineC2ROMPTargetT::read_splines(hdf_archive& h5f) +template +bool SplineC2ROMPTargetT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool -SplineC2ROMPTargetT::write_splines(hdf_archive& h5f) +template +bool SplineC2ROMPTargetT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void -SplineC2ROMPTargetT::assign_v(const PointType& r, - const vContainer_type& myV, ValueVector& psi, int first, int last) const +template +inline void SplineC2ROMPTargetT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); + const ST x = r[0], y = r[1], z = r[2]; + const ST* restrict kx = myKcart->data(0); + const ST* restrict ky = myKcart->data(1); + const ST* restrict kz = myKcart->data(2); - TT* restrict psi_s = psi.data() + this->first_spo; + TT* restrict psi_s = psi.data() + this->first_spo; #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[jr] = val_r * c - val_i * s; - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; + for (size_t j = first; j < std::min(nComplexBands, last); j++) + { + ST s, c; + const size_t jr = j << 1; + const size_t ji = jr + 1; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + psi_s[jr] = val_r * c - val_i * s; + psi_s[ji] = val_i * c + val_r * s; + } + + psi_s += nComplexBands; #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[j] = val_r * c - val_i * s; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) + { + ST s, c; + const ST val_r = myV[2 * j]; + const ST val_i = myV[2 * j + 1]; + omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); + psi_s[j] = val_r * c - val_i * s; + } } -template -void -SplineC2ROMPTargetT::evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) +template +void SplineC2ROMPTargetT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); - if (true) { + if (true) + { #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), - omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d( - SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } - } - else { - const size_t ChunkSizePerTeam = 512; - const int NumTeams = - (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - offload_scratch.resize(spline_padded_size); - results_scratch.resize(sposet_padded_size); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - auto* psi_ptr = psi.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size])") - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min( - first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional( - spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, - offload_scratch_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(x, y, z, results_scratch_ptr, - offload_scratch_ptr, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - } + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - for (size_t i = 0; i < requested_orb_size; i++) - psi[i] = results_scratch[i]; - } + spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first / 2, last / 2); } -} - -template -void -SplineC2ROMPTargetT::evaluateDetRatios( - const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) -{ - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 6); - - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - - // pack particle positions - auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size(); - for (int iat = 0; iat < nVP; ++iat) { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } - + } + else + { const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const auto spline_padded_size = myV.size(); const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - offload_scratch.resize(spline_padded_size * nVP); - results_scratch.resize(sposet_padded_size * nVP); + offload_scratch.resize(spline_padded_size); + results_scratch.resize(sposet_padded_size); // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); + const auto* spline_ptr = SplineInst->getSplinePtr(); auto* offload_scratch_ptr = offload_scratch.data(); auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = this->first_spo; + auto* psi_ptr = psi.data(); + const auto x = r[0], y = r[1], z = r[2]; + const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psiinv.size(); + const auto requested_orb_size = psi.size(); { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ + map(always, from: results_scratch_ptr[0:sposet_padded_size])") + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, + offload_scratch_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size, + first_spo_local, nComplexBands_local, index); + } + + for (size_t i = 0; i < requested_orb_size; i++) + psi[i] = results_scratch[i]; + } + } +} + +template +void SplineC2ROMPTargetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) +{ + const int nVP = VP.getTotalNum(); + psiinv_pos_copy.resize(psiinv.size() + nVP * 6); + + // stage psiinv to psiinv_pos_copy + std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); + + // pack particle positions + auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size(); + for (int iat = 0; iat < nVP; ++iat) + { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_scratch[iat * 6] = r[0]; + pos_scratch[iat * 6 + 1] = r[1]; + pos_scratch[iat * 6 + 2] = r[2]; + pos_scratch[iat * 6 + 3] = ru[0]; + pos_scratch[iat * 6 + 4] = ru[1]; + pos_scratch[iat * 6 + 5] = ru[2]; + } + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + ratios_private.resize(nVP, NumTeams); + const auto spline_padded_size = myV.size(); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); + offload_scratch.resize(spline_padded_size * nVP); + results_scratch.resize(sposet_padded_size * nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* psiinv_ptr = psiinv_pos_copy.data(); + auto* ratios_private_ptr = ratios_private.data(); + const size_t first_spo_local = this->first_spo; + const size_t nComplexBands_local = nComplexBands; + const auto requested_orb_size = psiinv.size(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min( - first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = - offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = - results_scratch_ptr + sposet_padded_size * iat; - auto* restrict pos_scratch = psiinv_ptr + requested_orb_size; - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, - ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), - ST(pos_scratch[iat * 6 + 1]), - ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr, - offload_scratch_iat_ptr, myKcart_ptr, - myKcart_padded_size, first_spo_local, + for (int iat = 0; iat < nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; + auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; + auto* restrict pos_scratch = psiinv_ptr + requested_orb_size; + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), + ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), + psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local, index); - const size_t first_real = first_cplx + - omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = omptarget::min( - last_cplx + omptarget::min(nComplexBands_local, last_cplx), - requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } + const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); + const size_t last_real = + omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); + TT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_real; i < last_real; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + for (int iat = 0; iat < nVP; ++iat) + { + ratios[iat] = TT(0); + for (int tid = 0; tid < NumTeams; tid++) + ratios[iat] += ratios_private[iat][tid]; + } } -template -void -SplineC2ROMPTargetT::mw_evaluateDetRatios( +template +void SplineC2ROMPTargetT::mw_evaluateDetRatios( const RefVectorWithLeader>& spo_list, const RefVectorWithLeader>& vp_list, const RefVector& psi_list, const std::vector& invRow_ptr_list, std::vector>& ratios_list) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t requested_orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSetT& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = - nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer( - reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast( - det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + - nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) { - const VirtualParticleSetT& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; - } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - mw_offload_scratch.resize(spline_padded_size * mw_nVP); - mw_results_scratch.resize(sposet_padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = this->first_spo; - const size_t nComplexBands_local = nComplexBands; - + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; + auto& mw_ratios_private = mw_mem.mw_ratios_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const size_t nw = spo_list.size(); + const size_t requested_orb_size = phi_leader.size(); + + size_t mw_nVP = 0; + for (const VirtualParticleSetT& VP : vp_list) + mw_nVP += VP.getTotalNum(); + + const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int)); + det_ratios_buffer_H2D.resize(packed_size); + + // pack invRow_ptr_list to det_ratios_buffer_H2D + Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); + for (size_t iw = 0; iw < nw; iw++) + ptr_buffer[iw] = invRow_ptr_list[iw]; + + // pack particle positions + auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); + auto* ref_id_ptr = + reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); + size_t iVP = 0; + for (size_t iw = 0; iw < nw; iw++) + { + const VirtualParticleSetT& VP = vp_list[iw]; + assert(ratios_list[iw].size() == VP.getTotalNum()); + for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ + ref_id_ptr[iVP] = iw; + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + pos_ptr[0] = r[0]; + pos_ptr[1] = r[1]; + pos_ptr[2] = r[2]; + pos_ptr[3] = ru[0]; + pos_ptr[4] = ru[1]; + pos_ptr[5] = ru[2]; + pos_ptr += 6; + } + } + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + mw_ratios_private.resize(mw_nVP, NumTeams); + const auto spline_padded_size = myV.size(); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); + mw_offload_scratch.resize(spline_padded_size * mw_nVP); + mw_results_scratch.resize(sposet_padded_size * mw_nVP); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* myKcart_ptr = myKcart->data(); + auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); + auto* ratios_private_ptr = mw_ratios_private.data(); + const size_t first_spo_local = this->first_spo; + const size_t nComplexBands_local = nComplexBands; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min( - first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = - offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = - results_scratch_ptr + sposet_padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + - nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - auto* restrict psiinv_ptr = reinterpret_cast( - buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast( - buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, - ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), - ST(pos_scratch[iat * 6 + 1]), - ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr, - offload_scratch_iat_ptr, myKcart_ptr, - myKcart_padded_size, first_spo_local, + for (int iat = 0; iat < mw_nVP; iat++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; + auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; + auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); + auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; + auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); + + int ix, iy, iz; + ST a[4], b[4], c[4]; + spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), + ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, + offload_scratch_iat_ptr + first + index); + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), + psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local, index); - const size_t first_real = first_cplx + - omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = omptarget::min( - last_cplx + omptarget::min(nComplexBands_local, last_cplx), - requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; - } + const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); + const size_t last_real = + omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); + TT sum(0); + PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") + for (int i = first_real; i < last_real; i++) + sum += psi_iat_ptr[i] * psiinv_ptr[i]; + ratios_private_ptr[iat * NumTeams + team_id] = sum; + } + } + + // do the reduction manually + iVP = 0; + for (size_t iw = 0; iw < nw; iw++) + { + auto& ratios = ratios_list[iw]; + for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) + { + ratios[iat] = TT(0); + for (int tid = 0; tid < NumTeams; ++tid) + ratios[iat] += mw_ratios_private[iVP][tid]; } + } } /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ -template -inline void -SplineC2ROMPTargetT::assign_vgl_from_l( - const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +inline void SplineC2ROMPTargetT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; + constexpr ST two(2); + const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict k0 = myKcart->data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart->data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart->data(2); - ASSUME_ALIGNED(k2); + const ST* restrict k0 = myKcart->data(0); + ASSUME_ALIGNED(k0); + const ST* restrict k1 = myKcart->data(1); + ASSUME_ALIGNED(k1); + const ST* restrict k2 = myKcart->data(2); + ASSUME_ALIGNED(k2); - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); + const ST* restrict g0 = myG.data(0); + ASSUME_ALIGNED(g0); + const ST* restrict g1 = myG.data(1); + ASSUME_ALIGNED(g1); + const ST* restrict g2 = myG.data(2); + ASSUME_ALIGNED(g2); - const size_t N = this->kPoints.size(); + const size_t N = this->kPoints.size(); #pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + - two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = this->first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } + for (size_t j = 0; j < nComplexBands; j++) + { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + + const size_t psiIndex = this->first_spo + jr; + psi[psiIndex] = c * val_r - s * val_i; + psi[psiIndex + 1] = c * val_i + s * val_r; + d2psi[psiIndex] = c * lap_r - s * lap_i; + d2psi[psiIndex + 1] = c * lap_i + s * lap_r; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + } #pragma omp simd - for (size_t j = nComplexBands; j < N; j++) { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = this->first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + - two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } + for (size_t j = nComplexBands; j < N; j++) + { + const size_t jr = j << 1; + const size_t ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g0[jr]; + const ST dY_r = g1[jr]; + const ST dZ_r = g2[jr]; + + const ST dX_i = g0[ji]; + const ST dY_i = g1[ji]; + const ST dZ_i = g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + const size_t psiIndex = this->first_spo + nComplexBands + j; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); + const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); + d2psi[psiIndex] = c * lap_r - s * lap_i; + } } -template -void -SplineC2ROMPTargetT::evaluateVGL(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SplineC2ROMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + const auto spline_padded_size = myV.size(); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); + // for V(1)G(3)H(6) intermediate result + offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS); + // for V(1)G(3)L(1) final result + results_scratch.resize(sposet_padded_size * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto x = r[0], y = r[1], z = r[2]; + const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + const size_t nComplexBands_local = nComplexBands; + const auto requested_orb_size = psi.size(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ map(always, from: results_scratch_ptr[0:sposet_padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = - omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, - iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], - GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + - output_index] = - SymTrace(offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS00 + - output_index], - offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS01 + - output_index], - offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS02 + - output_index], - offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS11 + - output_index], - offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS12 + - output_index], - offload_scratch_ptr[spline_padded_size * - SoAFields3D::HESS22 + - output_index], - symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(x, y, z, results_scratch_ptr, - sposet_padded_size, mKK_ptr, offload_scratch_ptr, - spline_padded_size, G, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - } - } - - for (size_t i = 0; i < requested_orb_size; i++) { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + sposet_padded_size * 1]; - dpsi[i][1] = results_scratch[i + sposet_padded_size * 2]; - dpsi[i][2] = results_scratch[i + sposet_padded_size * 3]; - d2psi[i] = results_scratch[i + sposet_padded_size * 4]; + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, + offload_scratch_ptr + first + index, spline_padded_size); + const int output_index = first + index; + offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); + } + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr, + spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local, + index); } + } + + for (size_t i = 0; i < requested_orb_size; i++) + { + psi[i] = results_scratch[i]; + dpsi[i][0] = results_scratch[i + sposet_padded_size * 1]; + dpsi[i][1] = results_scratch[i + sposet_padded_size * 2]; + dpsi[i][2] = results_scratch[i + sposet_padded_size * 3]; + d2psi[i] = results_scratch[i + sposet_padded_size * 4]; + } } -template -void -SplineC2ROMPTargetT::evaluateVGLMultiPos( - const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2ROMPTargetT::evaluateVGLMultiPos(const Vector>& multi_pos, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize( - spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = this->first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi_v_list[0].get().size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + const size_t num_pos = psi_v_list.size(); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const auto spline_padded_size = myV.size(); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); + // for V(1)G(3)H(6) intermediate result + offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); + // for V(1)G(3)L(1) final result + results_scratch.resize(sposet_padded_size * num_pos * 5); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* pos_copy_ptr = multi_pos.data(); + auto* offload_scratch_ptr = offload_scratch.data(); + auto* results_scratch_ptr = results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + const size_t first_spo_local = this->first_spo; + const size_t nComplexBands_local = nComplexBands; + const auto requested_orb_size = psi_v_list[0].get().size(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: pos_copy_ptr[0:num_pos*6]) \ map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min( - first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + - spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = - results_scratch_ptr + sposet_padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], - d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, - pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, - d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], - GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_iw_ptr + first + index, - spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::LAPL + - output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS00 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS01 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS02 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS11 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS12 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS22 + - output_index], - symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_copy_ptr[iw * 6], - pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], - psi_iw_ptr, sposet_padded_size, mKK_ptr, - offload_scratch_iw_ptr, spline_padded_size, G, - myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) { - auto* restrict results_iw_ptr = - results_scratch_ptr + sposet_padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < requested_orb_size; i++) { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4]; + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], + pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, + d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); } + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, + sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, + myKcart_padded_size, first_spo_local, nComplexBands_local, index); + } + } + + for (int iw = 0; iw < num_pos; ++iw) + { + auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; + ValueVector& psi_v(psi_v_list[iw]); + GradVector& dpsi_v(dpsi_v_list[iw]); + ValueVector& d2psi_v(d2psi_v_list[iw]); + for (size_t i = 0; i < requested_orb_size; i++) + { + psi_v[i] = results_iw_ptr[i]; + dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size]; + dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2]; + dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3]; + d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4]; } + } } -template -void -SplineC2ROMPTargetT::mw_evaluateVGL( - const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2ROMPTargetT::mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, - mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); + assert(this == &sa_list.getLeader()); + auto& phi_leader = sa_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& mw_pos_copy = mw_mem.mw_pos_copy; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = sa_list.size(); + mw_pos_copy.resize(nwalkers * 6); + + // pack particle positions + for (int iw = 0; iw < nwalkers; ++iw) + { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + mw_pos_copy[iw * 6] = r[0]; + mw_pos_copy[iw * 6 + 1] = r[1]; + mw_pos_copy[iw * 6 + 2] = r[2]; + mw_pos_copy[iw * 6 + 3] = ru[0]; + mw_pos_copy[iw * 6 + 4] = ru[1]; + mw_pos_copy[iw * 6 + 5] = ru[2]; + } + + phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, + d2psi_v_list); } -template -void -SplineC2ROMPTargetT::mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const +template +void SplineC2ROMPTargetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.template getCastedLeader(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast( - buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - mw_offload_scratch.resize( - spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(sposet_padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = this->first_spo; - const auto requested_orb_size = phi_vgl_v.size(2); - const size_t phi_vgl_stride = num_pos * requested_orb_size; - const size_t nComplexBands_local = nComplexBands; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD( - "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); + auto& buffer_H2D = mw_mem.buffer_H2D; + auto& rg_private = mw_mem.rg_private; + auto& mw_offload_scratch = mw_mem.mw_offload_scratch; + auto& mw_results_scratch = mw_mem.mw_results_scratch; + const int nwalkers = spo_list.size(); + buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); + + // pack particle positions and invRow pointers. + for (int iw = 0; iw < nwalkers; ++iw) + { + const PointType& r = P_list[iw].activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); + + pos_copy[0] = r[0]; + pos_copy[1] = r[1]; + pos_copy[2] = r[2]; + pos_copy[3] = ru[0]; + pos_copy[4] = ru[1]; + pos_copy[5] = ru[2]; + + auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); + invRow_ptr = invRow_ptr_list[iw]; + } + + const size_t num_pos = nwalkers; + const auto spline_padded_size = myV.size(); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); + const size_t ChunkSizePerTeam = 512; + const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); + // for V(1)G(3)L(1) final result + mw_results_scratch.resize(sposet_padded_size * num_pos * 5); + // per team ratio and grads + rg_private.resize(num_pos, NumTeams * 4); + + // Ye: need to extract sizes and pointers before entering target region + const auto* spline_ptr = SplineInst->getSplinePtr(); + auto* buffer_H2D_ptr = buffer_H2D.data(); + auto* offload_scratch_ptr = mw_offload_scratch.data(); + auto* results_scratch_ptr = mw_results_scratch.data(); + const auto myKcart_padded_size = myKcart->capacity(); + auto* mKK_ptr = mKK->data(); + auto* GGt_ptr = GGt_offload->data(); + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + auto* myKcart_ptr = myKcart->data(); + auto* phi_vgl_ptr = phi_vgl_v.data(); + auto* rg_private_ptr = rg_private.data(); + const size_t buffer_H2D_stride = buffer_H2D.cols(); + const size_t first_spo_local = this->first_spo; + const auto requested_orb_size = phi_vgl_v.size(2); + const size_t phi_vgl_stride = num_pos * requested_orb_size; + const size_t nComplexBands_local = nComplexBands; + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min( - first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + - spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = - results_scratch_ptr + sposet_padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast( - buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + - buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], - d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], - pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, c, da, db, - dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], - PrimLattice_G_ptr[2], PrimLattice_G_ptr[3], - PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], - PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], - GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], - GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, - first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_iw_ptr + first + index, - spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::LAPL + - output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS00 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS01 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS02 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS11 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS12 + - output_index], - offload_scratch_iw_ptr[spline_padded_size * - SoAFields3D::HESS22 + - output_index], - symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], - psi_iw_ptr, sposet_padded_size, mKK_ptr, - offload_scratch_iw_ptr, spline_padded_size, G, - myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size; - ValueType* restrict dpsi_y = - psi_iw_ptr + sposet_padded_size * 2; - ValueType* restrict dpsi_z = - psi_iw_ptr + sposet_padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4; - - ValueType* restrict out_phi = - phi_vgl_ptr + iw * requested_orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - const size_t first_real = first_cplx + - omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = omptarget::min( - last_cplx + omptarget::min(nComplexBands_local, last_cplx), - requested_orb_size); - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for \ + for (int iw = 0; iw < num_pos; iw++) + for (int team_id = 0; team_id < NumTeams; team_id++) + { + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); + + auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; + auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; + const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); + const auto* restrict invRow_iw_ptr = + *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); + + int ix, iy, iz; + ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; + spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, + c, da, db, dc, d2a, d2b, d2c); + + const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], + PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], + PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; + const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], + GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; + + PRAGMA_OFFLOAD("omp parallel for") + for (int index = 0; index < last - first; index++) + { + spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, + d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); + const int output_index = first + index; + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = + SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], + offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); + } + const size_t first_cplx = first / 2; + const size_t last_cplx = last / 2; + PRAGMA_OFFLOAD("omp parallel for") + for (int index = first_cplx; index < last_cplx; index++) + C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr, + offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size, + first_spo_local, nComplexBands_local, index); + + ValueType* restrict psi = psi_iw_ptr; + ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size; + ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2; + ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3; + ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4; + + ValueType* restrict out_phi = phi_vgl_ptr + iw * requested_orb_size; + ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; + ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; + ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; + ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; + + const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); + const size_t last_real = + omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); + ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); + PRAGMA_OFFLOAD("omp parallel for \ reduction(+: ratio, grad_x, grad_y, grad_z)") - for (size_t j = first_real; j < last_real; j++) { - out_phi[j] = psi[j]; - out_dphi_x[j] = dpsi_x[j]; - out_dphi_y[j] = dpsi_y[j]; - out_dphi_z[j] = dpsi_z[j]; - out_d2phi[j] = d2psi[j]; - - ratio += psi[j] * invRow_iw_ptr[j]; - grad_x += dpsi_x[j] * invRow_iw_ptr[j]; - grad_y += dpsi_y[j] * invRow_iw_ptr[j]; - grad_z += dpsi_z[j] * invRow_iw_ptr[j]; - } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } - - for (int iw = 0; iw < num_pos; iw++) { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; + for (size_t j = first_real; j < last_real; j++) + { + out_phi[j] = psi[j]; + out_dphi_x[j] = dpsi_x[j]; + out_dphi_y[j] = dpsi_y[j]; + out_dphi_z[j] = dpsi_z[j]; + out_d2phi[j] = d2psi[j]; + + ratio += psi[j] * invRow_iw_ptr[j]; + grad_x += dpsi_x[j] * invRow_iw_ptr[j]; + grad_y += dpsi_y[j] * invRow_iw_ptr[j]; + grad_z += dpsi_z[j] * invRow_iw_ptr[j]; } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; + + rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; + rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; + } + } + + for (int iw = 0; iw < num_pos; iw++) + { + ValueType ratio(0); + for (int team_id = 0; team_id < NumTeams; team_id++) + ratio += rg_private[iw][team_id * 4]; + ratios[iw] = ratio; + + ValueType grad_x(0), grad_y(0), grad_z(0); + for (int team_id = 0; team_id < NumTeams; team_id++) + { + grad_x += rg_private[iw][team_id * 4 + 1]; + grad_y += rg_private[iw][team_id * 4 + 2]; + grad_z += rg_private[iw][team_id * 4 + 3]; } + grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; + } } -template -void -SplineC2ROMPTargetT::assign_vgh(const PointType& r, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const +template +void SplineC2ROMPTargetT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { - // protect last - last = last > this->kPoints.size() ? this->kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), - g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), - g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); + // protect last + last = last > this->kPoints.size() ? this->kPoints.size() : last; + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02) + - kX * (gX_i + dX_i); - const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12) + - kX * (gY_i + dY_i); - const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22) + - kX * (gZ_i + dZ_i); - const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g00, g01, g02) + - kY * (gX_i + dX_i); - const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12) + - kY * (gY_i + dY_i); - const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22) + - kY * (gZ_i + dZ_i); - const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g00, g01, g02) + - kZ * (gX_i + dX_i); - const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g10, g11, g12) + - kZ * (gY_i + dY_i); - const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22) + - kZ * (gZ_i + dZ_i); - - const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02) - - kX * (gX_r + dX_r); - const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12) - - kX * (gY_r + dY_r); - const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22) - - kX * (gZ_r + dZ_r); - const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g00, g01, g02) - - kY * (gX_r + dX_r); - const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12) - - kY * (gY_r + dY_r); - const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22) - - kY * (gZ_r + dZ_r); - const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g00, g01, g02) - - kZ * (gX_r + dX_r); - const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g10, g11, g12) - - kZ * (gY_r + dY_r); - const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22) - - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } + for (size_t j = first; j < std::min(nComplexBands, last); j++) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + jr; + + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + psi[psiIndex + 1] = c * val_i + s * val_r; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + + const ST h_xx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); + const ST h_xy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); + const ST h_xz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); + const ST h_yx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); + const ST h_yy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); + const ST h_yz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); + const ST h_zx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); + const ST h_zy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); + const ST h_zz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); + const ST h_xy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); + const ST h_xz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); + const ST h_yx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); + const ST h_yy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); + const ST h_yz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); + const ST h_zx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); + const ST h_zy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); + const ST h_zz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; + grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; + grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; + grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; + grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; + grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; + grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; + } #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02) + - kX * (gX_i + dX_i); - const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12) + - kX * (gY_i + dY_i); - const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22) + - kX * (gZ_i + dZ_i); - const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g00, g01, g02) + - kY * (gX_i + dX_i); - const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12) + - kY * (gY_i + dY_i); - const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22) + - kY * (gZ_i + dZ_i); - const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g00, g01, g02) + - kZ * (gX_i + dX_i); - const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g10, g11, g12) + - kZ * (gY_i + dY_i); - const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22) + - kZ * (gZ_i + dZ_i); - - const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02) - - kX * (gX_r + dX_r); - const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12) - - kX * (gY_r + dY_r); - const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22) - - kX * (gZ_r + dZ_r); - const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g00, g01, g02) - - kY * (gX_r + dX_r); - const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12) - - kY * (gY_r + dY_r); - const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22) - - kY * (gZ_r + dZ_r); - const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g00, g01, g02) - - kZ * (gX_r + dX_r); - const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g10, g11, g12) - - kZ * (gY_r + dY_r); - const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22) - - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + nComplexBands + j; + + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + const ST h_xx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); + const ST h_xy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); + const ST h_xz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); + const ST h_yx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); + const ST h_yy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); + const ST h_yz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); + const ST h_zx_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); + const ST h_zy_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); + const ST h_zz_r = + v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); + + const ST h_xx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); + const ST h_xy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); + const ST h_xz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); + const ST h_yx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); + const ST h_yy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); + const ST h_yz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); + const ST h_zx_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); + const ST h_zy_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); + const ST h_zz_i = + v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; + grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + } } -template -void -SplineC2ROMPTargetT::evaluateVGH(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2ROMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), - omp_get_thread_num(), first, last); + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - spline2::evaluate3d_vgh( - SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } + spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); + assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); + } } -template -void -SplineC2ROMPTargetT::assign_vghgh(const PointType& r, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, - int first, int last) const +template +void SplineC2ROMPTargetT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { - // protect last - last = last < 0 ? this->kPoints.size() : - (last > this->kPoints.size() ? this->kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), - g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), - g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); + // protect last + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); + + const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), + g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), + g22 = PrimLattice.G(8); + const ST x = r[0], y = r[1], z = r[2]; + + const ST* restrict k0 = myKcart->data(0); + const ST* restrict k1 = myKcart->data(1); + const ST* restrict k2 = myKcart->data(2); + + const ST* restrict g0 = myG.data(0); + const ST* restrict g1 = myG.data(1); + const ST* restrict g2 = myG.data(2); + const ST* restrict h00 = myH.data(0); + const ST* restrict h01 = myH.data(1); + const ST* restrict h02 = myH.data(2); + const ST* restrict h11 = myH.data(3); + const ST* restrict h12 = myH.data(4); + const ST* restrict h22 = myH.data(5); + + const ST* restrict gh000 = mygH.data(0); + const ST* restrict gh001 = mygH.data(1); + const ST* restrict gh002 = mygH.data(2); + const ST* restrict gh011 = mygH.data(3); + const ST* restrict gh012 = mygH.data(4); + const ST* restrict gh022 = mygH.data(5); + const ST* restrict gh111 = mygH.data(6); + const ST* restrict gh112 = mygH.data(7); + const ST* restrict gh122 = mygH.data(8); + const ST* restrict gh222 = mygH.data(9); // SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - // intermediates for computation of hessian. \partial_i \partial_j phi - // in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - // These are the real and imaginary components of the third SPO - // derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, - // and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - // Here is where we build up the components of the physical hessian - // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + - kX * kX * kX * val_r; - const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + - (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + - kX * kY * kZ * val_r; - const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + - kY * kY * kY * val_r; - const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + - kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } + for (size_t j = first; j < std::min(nComplexBands, last); j++) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + jr; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + psi[psiIndex + 1] = c * val_i + s * val_r; + dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; + dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; + dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; + grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; + grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; + grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; + grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; + grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; + const ST gh_xxy_r = + f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = + f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = + f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = + f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = + f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = + f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; + const ST gh_xzz_r = + f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = + f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; + const ST gh_yyz_r = + f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = + f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = + f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = + f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; + + grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; + grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; + + grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; + grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; + + grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; + + grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; + grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; + + grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; + grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; + grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; + grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; + + grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; + grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; + grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; + grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; + grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; + grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; + grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; + grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; + } #pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - // phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - // dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = this->first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - // intermediates for computation of hessian. \partial_i \partial_j phi - // in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], - h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], - h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - // These are the real and imaginary components of the third SPO - // derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, - // and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], - gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], - gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], - gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], - gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - // Here is where we build up the components of the physical hessian - // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + - kX * kX * kX * val_r; - const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + - (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + - kX * kY * kZ * val_r; - const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + - kY * kY * kY * val_r; - const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + - kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } + for (size_t j = std::max(nComplexBands, first); j < last; j++) + { + int jr = j << 1; + int ji = jr + 1; + + const ST kX = k0[j]; + const ST kY = k1[j]; + const ST kZ = k2[j]; + const ST val_r = myV[jr]; + const ST val_i = myV[ji]; + + // phase + ST s, c; + omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); + + // dot(PrimLattice.G,myG[j]) + const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; + const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; + const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; + + const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; + const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; + const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; + + // \f$\nabla \psi_r + {\bf k}\psi_i\f$ + const ST gX_r = dX_r + val_i * kX; + const ST gY_r = dY_r + val_i * kY; + const ST gZ_r = dZ_r + val_i * kZ; + const ST gX_i = dX_i - val_r * kX; + const ST gY_i = dY_i - val_r * kY; + const ST gZ_i = dZ_i - val_r * kZ; + + const size_t psiIndex = this->first_spo + nComplexBands + j; + psi[psiIndex] = c * val_r - s * val_i; + dpsi[psiIndex][0] = c * gX_r - s * gX_i; + dpsi[psiIndex][1] = c * gY_r - s * gY_i; + dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; + + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. + const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); + const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); + const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); + const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); + const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); + const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); + + const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); + const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); + const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); + const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); + const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); + const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); + + const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; + const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; + const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; + const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; + const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; + const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; + + const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; + const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; + const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; + const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; + const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; + const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; + + grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; + grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; + grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; + grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; + grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; + grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; + + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. + + const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], + gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); + const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); + const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); + const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); + const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); + const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); + const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); + const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); + const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); + const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], + gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); + + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; + const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; + const ST gh_xxy_r = + f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; + const ST gh_xxy_i = + f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; + const ST gh_xxz_r = + f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; + const ST gh_xxz_i = + f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; + const ST gh_xyy_r = + f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; + const ST gh_xyy_i = + f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; + const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - + (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; + const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - + (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; + const ST gh_xzz_r = + f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; + const ST gh_xzz_i = + f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; + const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; + const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; + const ST gh_yyz_r = + f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; + const ST gh_yyz_i = + f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; + const ST gh_yzz_r = + f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; + const ST gh_yzz_i = + f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; + const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; + const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; + //[x][xx] //These are the unique entries + grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; + grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; + + grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; + grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; + grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; + grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; + + grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; + grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; + grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; + grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; + grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; + grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; + } } -template -void -SplineC2ROMPTargetT::evaluateVGHGH(const ParticleSetT& P, - const int iat, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) +template +void SplineC2ROMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), - omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh( - SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, - last / 2); - } + { + int first, last; + FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); + + spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); + assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); + } } -template -void -SplineC2ROMPTargetT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SplineC2ROMPTargetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; - iat += block_size, i += block_size) { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], this->OrbitalSetSize); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], this->OrbitalSetSize); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], this->OrbitalSetSize); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } - - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, - psi_v_list, dpsi_v_list, d2psi_v_list); + // chunk the [first, last) loop into blocks to save temporary memory usage + const int block_size = 16; + + // reference vectors refer to the rows of matrices + std::vector multi_psi_v; + std::vector multi_dpsi_v; + std::vector multi_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + + multi_psi_v.reserve(block_size); + multi_dpsi_v.reserve(block_size); + multi_d2psi_v.reserve(block_size); + psi_v_list.reserve(block_size); + dpsi_v_list.reserve(block_size); + d2psi_v_list.reserve(block_size); + + for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) + { + const int actual_block_size = std::min(last - iat, block_size); + multi_pos_copy.resize(actual_block_size * 6); + multi_psi_v.clear(); + multi_dpsi_v.clear(); + multi_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); + + for (int ipos = 0; ipos < actual_block_size; ++ipos) + { + // pack particle positions + const PointType& r = P.activeR(iat + ipos); + PointType ru(PrimLattice.toUnit_floor(r)); + multi_pos_copy[ipos * 6] = r[0]; + multi_pos_copy[ipos * 6 + 1] = r[1]; + multi_pos_copy[ipos * 6 + 2] = r[2]; + multi_pos_copy[ipos * 6 + 3] = ru[0]; + multi_pos_copy[ipos * 6 + 4] = ru[1]; + multi_pos_copy[ipos * 6 + 5] = ru[2]; + + multi_psi_v.emplace_back(logdet[i + ipos], this->OrbitalSetSize); + multi_dpsi_v.emplace_back(dlogdet[i + ipos], this->OrbitalSetSize); + multi_d2psi_v.emplace_back(d2logdet[i + ipos], this->OrbitalSetSize); + + psi_v_list.push_back(multi_psi_v[ipos]); + dpsi_v_list.push_back(multi_dpsi_v[ipos]); + d2psi_v_list.push_back(multi_d2psi_v[ipos]); } + + evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); + } } template class SplineC2ROMPTargetT; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h index 0d3aef1f2d0..bf785a03926 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h @@ -42,339 +42,292 @@ namespace qmcplusplus * orbital. All the output orbitals are real (C2R). The maximal number of output * orbitals is OrbitalSetSize. */ -template +template class SplineC2ROMPTargetT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using typename BsplineSetT::ValueType; - using typename BsplineSetT::GradType; - using typename BsplineSetT::GGGVector; - using typename BsplineSetT::GradVector; - using typename BsplineSetT::GradMatrix; - using typename BsplineSetT::HessVector; - using typename BsplineSetT::ValueVector; - using typename BsplineSetT::ValueMatrix; - using typename BsplineSetT::OffloadMWVGLArray; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + template + using OffloadVector = Vector>; + template + using OffloadPosVector = VectorSoaContainer>; private: - /// timer for offload portion - NewTimer& offload_timer_; - /// primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to - /// CartesianUnit, e.g. Hessian - Tensor GGt; - /// number of complex bands - int nComplexBands; - /// multi bspline set - std::shared_ptr< - MultiBspline, OffloadAllocator>> - SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - /// team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - /// offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - /// result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - /// psiinv and position scratch space, used to avoid allocation on the fly - /// and faster transfer - Vector> psiinv_pos_copy; - /// position scratch space, used to avoid allocation on the fly and faster - /// transfer - Vector> multi_pos_copy; - - void - evaluateVGLMultiPos( - const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; + /// timer for offload portion + NewTimer& offload_timer_; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// number of complex bands + int nComplexBands; + /// multi bspline set + std::shared_ptr, OffloadAllocator>> SplineInst; + + std::shared_ptr> mKK; + std::shared_ptr> myKcart; + std::shared_ptr> GGt_offload; + std::shared_ptr> PrimLattice_G_offload; + + ResourceHandle> mw_mem_handle_; + + /// team private ratios for reduction, numVP x numTeams + Matrix> ratios_private; + /// offload scratch space, dynamically resized to the maximal need + Vector> offload_scratch; + /// result scratch space, dynamically resized to the maximal need + Vector> results_scratch; + /// psiinv and position scratch space, used to avoid allocation on the fly + /// and faster transfer + Vector> psiinv_pos_copy; + /// position scratch space, used to avoid allocation on the fly and faster + /// transfer + Vector> multi_pos_copy; + + void evaluateVGLMultiPos(const Vector>& multi_pos_copy, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2ROMPTargetT(const std::string& my_name) : - BsplineSetT(my_name), - offload_timer_( - createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), + SplineC2ROMPTargetT(const std::string& my_name) + : BsplineSetT(my_name), + offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), nComplexBands(0), GGt_offload(std::make_shared>(9)), PrimLattice_G_offload(std::make_shared>(9)) - { - } - - SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in); - - virtual std::string - getClassName() const override - { - return "SplineC2ROMPTarget"; - } - virtual std::string - getKeyword() const override - { - return "SplineC2R"; - } - bool - isComplex() const override - { - return true; - }; - virtual bool - isOMPoffload() const override - { - return true; - } - - void - createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource( - std::make_unique>()); - } - - void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader(); - phi_leader.mw_mem_handle_ = - collection.lendResource>(); - } - - void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = - spo_list.template getCastedLeader(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr> - makeClone() const override - { - return std::make_unique(*this); - } - - inline void - resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] = this->offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), - SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void - create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, - OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) - << " MB allocated " - << "for the coefficients in 3D spline orbital representation" - << std::endl; - } - - /// this routine can not be called from threaded region - void - finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target \ + {} + + SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in); + + virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; } + virtual std::string getKeyword() const override { return "SplineC2R"; } + bool isComplex() const override { return true; }; + virtual bool isOMPoffload() const override { return true; } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique>()); + } + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + phi_leader.mw_mem_handle_ = collection.lendResource>(); + } + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + collection.takebackResource(phi_leader.mw_mem_handle_); + } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] = this->offset[ib] * 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared, OffloadAllocator>>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + /// this routine can not be called from threaded region + void finalizeConstruction() override + { + // map the SplineInst->getSplinePtr() structure to GPU + auto* MultiSpline = SplineInst->getSplinePtr(); + auto* restrict coefs = MultiSpline->coefs; + // attach pointers on the device to achieve deep copy + PRAGMA_OFFLOAD("omp target \ map(always, to: MultiSpline[0:1], \ coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD( - "omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (size_t i = 0; i < 9; i++) { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + { + MultiSpline->coefs = coefs; } - inline void - flush_zero() + // transfer static data to GPU + auto* mKK_ptr = mKK->data(); + PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") + auto* myKcart_ptr = myKcart->data(); + PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") + for (size_t i = 0; i < 9; i++) { - SplineInst->flush_zero(); + (*GGt_offload)[i] = GGt[i]; + (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; } - - /** remap kPoints to pack the double copy */ - inline void - resize_kpoints() + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") + auto* GGt_ptr = GGt_offload->data(); + PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + nComplexBands = this->remap_kpoints(); + const int nk = this->kPoints.size(); + mKK = std::make_shared>(nk); + myKcart = std::make_shared>(nk); + for (size_t i = 0; i < nk; ++i) { - nComplexBands = this->remap_kpoints(); - const int nk = this->kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) { - (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); - (*myKcart)(i) = this->kPoints[i]; - } + (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); + (*myKcart)(i) = this->kPoints[i]; } + } - void - set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, - int twist, int ispline, int level); + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - bool - read_splines(hdf_archive& h5f); + bool read_splines(hdf_archive& h5f); - bool - write_splines(hdf_archive& h5f); + bool write_splines(hdf_archive& h5f); - void - assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, - int first, int last) const; + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - virtual void - evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + virtual void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - virtual void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override; + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; - virtual void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ - void - assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi); - - virtual void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - virtual void - mw_evaluateVGL(const RefVectorWithLeader>& sa_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void - mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const override; - - void - assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, int first, int last) const; - - virtual void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override; - - void - assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, - int last = -1) const; - - virtual void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + virtual void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + virtual void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + virtual void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + virtual void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h index b7cf9e109dd..3aef6d2f2f4 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h @@ -44,212 +44,180 @@ namespace qmcplusplus * orbital. All the output orbitals are real (C2R). The maximal number of output * orbitals is OrbitalSetSize. */ -template +template class SplineC2RT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using typename BsplineSetT::GGGVector; - using typename BsplineSetT::GradVector; - using typename BsplineSetT::HessVector; - using typename BsplineSetT::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; private: - /// primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to - ///CartesianUnit, e.g. Hessian - Tensor GGt; - /// number of complex bands - int nComplexBands; - /// multi bspline set - std::shared_ptr> SplineInst; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - /// thread private ratios for reduction when using nested threading, numVP x - /// numThread - Matrix ratios_private; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// number of complex bands + int nComplexBands; + /// multi bspline set + std::shared_ptr> SplineInst; + + vContainer_type mKK; + VectorSoaContainer myKcart; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineC2RT(const std::string& my_name) : - BsplineSetT(my_name), - nComplexBands(0) + SplineC2RT(const std::string& my_name) : BsplineSetT(my_name), nComplexBands(0) {} + + SplineC2RT(const SplineC2RT& in); + virtual std::string getClassName() const override { return "SplineC2R"; } + virtual std::string getKeyword() const override { return "SplineC2R"; } + bool isComplex() const override { return true; }; + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] = this->offset[ib] * 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + nComplexBands = this->remap_kpoints(); + const int nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) { + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; } + } - SplineC2RT(const SplineC2RT& in); - virtual std::string - getClassName() const override - { - return "SplineC2R"; - } - virtual std::string - getKeyword() const override - { - return "SplineC2R"; - } - bool - isComplex() const override - { - return true; - }; - - std::unique_ptr> - makeClone() const override - { - return std::make_unique(*this); - } - - inline void - resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - - for (size_t ib = 0; ib < this->offset.size(); ib++) - this->offset[ib] = this->offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), - SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void - create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) - << " MB allocated " - << "for the coefficients in 3D spline orbital representation" - << std::endl; - } - - inline void - flush_zero() - { - SplineInst->flush_zero(); - } - - /** remap kPoints to pack the double copy */ - inline void - resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = this->kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) { - mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); - myKcart(i) = this->kPoints[i]; - } - } - - void - set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, - int twist, int ispline, int level); + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - bool - read_splines(hdf_archive& h5f); + bool read_splines(hdf_archive& h5f); - bool - write_splines(hdf_archive& h5f); + bool write_splines(hdf_archive& h5f); - void - assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, - int first, int last) const; + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - void - evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; - void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override; + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; - /** assign_vgl + /** assign_vgl */ - void - assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi, int first, int last) const; + void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) + const; - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ - void - assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi); - - void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - void - assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, int first, int last) const; - - void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override; - - void - assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, - int last = -1) const; - - void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index 88265ffbec5..27055eb84a9 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -33,95 +33,72 @@ namespace qmcplusplus * Requires temporage storage and multiplication of the sign of the real part of * the phase Internal storage ST type arrays are aligned and padded. */ -template +template class SplineR2RT : public BsplineSetT { public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using RealType = typename SPOSetT::RealType; - using IndexType = typename SPOSetT::IndexType; - using FullPrecValueType = double; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - // types for evaluation results - using TT = typename BsplineSetT::ValueType; - using GGGVector = typename BsplineSetT::GGGVector; - using ValueMatrix = typename BsplineSetT::ValueMatrix; - using GradVector = typename BsplineSetT::GradVector; - using HessVector = typename BsplineSetT::HessVector; - using ValueVector = typename BsplineSetT::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using RealType = typename SPOSetT::RealType; + using IndexType = typename SPOSetT::IndexType; + using FullPrecValueType = double; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using GGGVector = typename BsplineSetT::GGGVector; + using ValueMatrix = typename BsplineSetT::ValueMatrix; + using GradVector = typename BsplineSetT::GradVector; + using HessVector = typename BsplineSetT::HessVector; + using ValueVector = typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; private: - bool IsGamma; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to - /// CartesianUnit, e.g. Hessian - Tensor GGt; - /// multi bspline set - std::shared_ptr> SplineInst; + bool IsGamma; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; - /// Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; - /// thread private ratios for reduction when using nested threading, numVP x - /// numThread - Matrix ratios_private; + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; protected: - /// primitive cell - CrystalLattice PrimLattice; - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; + /// primitive cell + CrystalLattice PrimLattice; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; public: - SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) - { - } - - SplineR2RT(const SplineR2RT& in); - virtual std::string - getClassName() const override - { - return "SplineR2RT"; - } - virtual std::string - getKeyword() const override - { - return "SplineR2RT"; - } - bool - isComplex() const override - { - return false; - }; - bool - isRotationSupported() const override - { - return true; - } - - std::unique_ptr> - makeClone() const override - { - return std::make_unique>(*this); - } - - /// Store an original copy of the spline coefficients for orbital rotation - void - storeParamsBeforeRotation() override; - - /* + SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) {} + + SplineR2RT(const SplineR2RT& in); + virtual std::string getClassName() const override { return "SplineR2RT"; } + virtual std::string getKeyword() const override { return "SplineR2RT"; } + bool isComplex() const override { return false; }; + bool isRotationSupported() const override { return true; } + + std::unique_ptr> makeClone() const override { return std::make_unique>(*this); } + + /// Store an original copy of the spline coefficients for orbital rotation + void storeParamsBeforeRotation() override; + + /* Implements orbital rotations via [1,2]. Should be called by RotatedSPOs::apply_rotation() @@ -134,139 +111,120 @@ class SplineR2RT : public BsplineSetT [2] Toulouse & Umrigar, JCP 126, (2007) [3] Townsend et al., PRB 102, (2020) */ - void - applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void - resizeStorage(size_t n, size_t nvals) - { - this->init_base(n); - const size_t npad = getAlignedSize(n); - this->myV.resize(npad); - this->myG.resize(npad); - this->myL.resize(npad); - this->myH.resize(npad); - this->mygH.resize(npad); - - IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && - (this->HalfG[2] == 0)); - } - - void - bcast_tables(Communicate* comm) - { - chunked_bcast(comm, SplineInst->getSplinePtr()); - } - - void - gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = this->kPoints.size(); - const int Nbandgroups = comm->size(); - this->offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, this->offset); - gatherv(comm, SplineInst->getSplinePtr(), - SplineInst->getSplinePtr()->z_stride, this->offset); - } - - template - void - create_spline(GT& xyz_g, BCT& xyz_bc) - { - GGt = dot(transpose(PrimLattice.G), PrimLattice.G); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) - << " MB allocated " - << "for the coefficients in 3D spline orbital representation" - << std::endl; - } - - inline void - flush_zero() - { - SplineInst->flush_zero(); - } - - void - set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, - int twist, int ispline, int level); - - bool - read_splines(hdf_archive& h5f); - - bool - write_splines(hdf_archive& h5f); - - /** convert position in PrimLattice unit and return sign */ - inline int - convertPos(const PointType& r, PointType& ru) - { - ru = PrimLattice.toUnit(r); - int bc_sign = 0; - for (int i = 0; i < this->D; i++) - if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) - ru[i] = ST(0.0); - else { - ST img = std::floor(ru[i]); - ru[i] -= img; - bc_sign += this->HalfG[i] * (int)img; - } - return bc_sign; - } - - void - assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, - int first, int last) const; - - void - evaluateValue( - const ParticleSetT& P, const int iat, ValueVector& psi) override; - - void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override; - - void - assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi, int first, int last) const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + const size_t npad = getAlignedSize(n); + this->myV.resize(npad); + this->myG.resize(npad); + this->myL.resize(npad); + this->myH.resize(npad); + this->mygH.resize(npad); + + IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0)); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + GGt = dot(transpose(PrimLattice.G), PrimLattice.G); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + /** convert position in PrimLattice unit and return sign */ + inline int convertPos(const PointType& r, PointType& ru) + { + ru = PrimLattice.toUnit(r); + int bc_sign = 0; + for (int i = 0; i < this->D; i++) + if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) + ru[i] = ST(0.0); + else + { + ST img = std::floor(ru[i]); + ru[i] -= img; + bc_sign += this->HalfG[i] * (int)img; + } + return bc_sign; + } + + void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in * cartesian */ - void - assign_vgl_from_l( - int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void - evaluateVGL(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - void - assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, int first, int last) const; - - void - evaluateVGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override; - - void - assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi, - HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0, - int last = -1) const; - - void - evaluateVGHGH(const ParticleSetT& P, const int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend class SplineSetReaderT; - template - friend class BsplineReaderBaseT; + void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) + const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h index 3fa31272e17..45c717f152d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h @@ -31,296 +31,268 @@ namespace qmcplusplus { /** General SplineSetReader to handle any unitcell */ -template +template class SplineSetReaderT : public BsplineReaderBaseT { public: - using splineset_t = SA; - using DataType = typename splineset_t::DataType; - using SplineType = typename splineset_t::SplineType; - using ValueType = typename splineset_t::ValueType; + using splineset_t = SA; + using DataType = typename splineset_t::DataType; + using SplineType = typename splineset_t::SplineType; + using ValueType = typename splineset_t::ValueType; - Array, 3> FFTbox; - Array splineData_r, splineData_i; - double rotate_phase_r, rotate_phase_i; - UBspline_3d_d* spline_r; - UBspline_3d_d* spline_i; - splineset_t* bspline; - fftw_plan FFTplan; + Array, 3> FFTbox; + Array splineData_r, splineData_i; + double rotate_phase_r, rotate_phase_i; + UBspline_3d_d* spline_r; + UBspline_3d_d* spline_i; + splineset_t* bspline; + fftw_plan FFTplan; - SplineSetReaderT(EinsplineSetBuilderT* e) : - BsplineReaderBaseT(e), - spline_r(nullptr), - spline_i(nullptr), - bspline(nullptr), - FFTplan(nullptr) - { - } + SplineSetReaderT(EinsplineSetBuilderT* e) + : BsplineReaderBaseT(e), spline_r(nullptr), spline_i(nullptr), bspline(nullptr), FFTplan(nullptr) + {} - ~SplineSetReaderT() override - { - clear(); - } + ~SplineSetReaderT() override { clear(); } - void - clear() - { - einspline::destroy(spline_r); - einspline::destroy(spline_i); - if (FFTplan != nullptr) - fftw_destroy_plan(FFTplan); - FFTplan = nullptr; - } + void clear() + { + einspline::destroy(spline_r); + einspline::destroy(spline_i); + if (FFTplan != nullptr) + fftw_destroy_plan(FFTplan); + FFTplan = nullptr; + } - // set info for Hybrid - virtual void - initialize_hybridrep_atomic_centers() - { - } - // transform cG to radial functions - virtual void - create_atomic_centers_Gspace(Vector>& cG, - Communicate& band_group_comm, int iorb) - { - } + // set info for Hybrid + virtual void initialize_hybridrep_atomic_centers() {} + // transform cG to radial functions + virtual void create_atomic_centers_Gspace(Vector>& cG, Communicate& band_group_comm, int iorb) {} - std::unique_ptr> - create_spline_set(const std::string& my_name, int spin, - const BandInfoGroup& bandgroup) override - { - ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); - // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, - // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, - // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; - bspline = new splineset_t(my_name); - app_log() << " ClassName = " << bspline->getClassName() << std::endl; - if (bspline->isComplex()) - app_log() << " Using complex einspline table" << std::endl; - else - app_log() << " Using real einspline table" << std::endl; + std::unique_ptr> create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) override + { + ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); + // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, + // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, + // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; + bspline = new splineset_t(my_name); + app_log() << " ClassName = " << bspline->getClassName() << std::endl; + if (bspline->isComplex()) + app_log() << " Using complex einspline table" << std::endl; + else + app_log() << " Using real einspline table" << std::endl; - // set info for Hybrid - this->initialize_hybridrep_atomic_centers(); + // set info for Hybrid + this->initialize_hybridrep_atomic_centers(); - // baseclass handles twists - this->check_twists(bspline, bandgroup); + // baseclass handles twists + this->check_twists(bspline, bandgroup); - Ugrid xyz_grid[3]; + Ugrid xyz_grid[3]; - typename splineset_t::BCType xyz_bc[3]; - bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc); - if (!havePsig) - this->myComm->barrier_and_abort( - "SplineSetReader needs psi_g. Set precision=\"double\"."); - bspline->create_spline(xyz_grid, xyz_bc); + typename splineset_t::BCType xyz_bc[3]; + bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc); + if (!havePsig) + this->myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); + bspline->create_spline(xyz_grid, xyz_bc); - std::ostringstream oo; - oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x" - << this->MeshSize[1] << "x" << this->MeshSize[2] << ".h5"; + std::ostringstream oo; + oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x" << this->MeshSize[1] << "x" << this->MeshSize[2] + << ".h5"; - const std::string splinefile(oo.str()); - bool root = (this->myComm->rank() == 0); - int foundspline = 0; - Timer now; - if (root) { - now.restart(); - hdf_archive h5f(this->myComm); - foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); - if (foundspline) { - std::string aname("none"); - foundspline = h5f.readEntry(aname, "class_name"); - foundspline = - (aname.find(bspline->getKeyword()) != std::string::npos); - } - if (foundspline) { - int sizeD = 0; - foundspline = h5f.readEntry(sizeD, "sizeof"); - foundspline = (sizeD == sizeof(DataType)); - } - if (foundspline) { - foundspline = bspline->read_splines(h5f); - if (foundspline) - app_log() << " Successfully restored coefficients from " - << splinefile << ". The reading time is " - << now.elapsed() << " sec." << std::endl; - } - h5f.close(); - } - this->myComm->bcast(foundspline); - if (foundspline) { - now.restart(); - bspline->bcast_tables(this->myComm); - app_log() << " SplineSetReader bcast the full table " - << now.elapsed() << " sec." << std::endl; - app_log().flush(); - } - else { - bspline->flush_zero(); - - int nx = this->MeshSize[0]; - int ny = this->MeshSize[1]; - int nz = this->MeshSize[2]; - if (havePsig) // perform FFT using FFTW - { - FFTbox.resize(nx, ny, nz); - FFTplan = fftw_plan_dft_3d(nx, ny, nz, - reinterpret_cast(FFTbox.data()), - reinterpret_cast(FFTbox.data()), +1, - FFTW_ESTIMATE); - splineData_r.resize(nx, ny, nz); - if (bspline->isComplex()) - splineData_i.resize(nx, ny, nz); + const std::string splinefile(oo.str()); + bool root = (this->myComm->rank() == 0); + int foundspline = 0; + Timer now; + if (root) + { + now.restart(); + hdf_archive h5f(this->myComm); + foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); + if (foundspline) + { + std::string aname("none"); + foundspline = h5f.readEntry(aname, "class_name"); + foundspline = (aname.find(bspline->getKeyword()) != std::string::npos); + } + if (foundspline) + { + int sizeD = 0; + foundspline = h5f.readEntry(sizeD, "sizeof"); + foundspline = (sizeD == sizeof(DataType)); + } + if (foundspline) + { + foundspline = bspline->read_splines(h5f); + if (foundspline) + app_log() << " Successfully restored coefficients from " << splinefile << ". The reading time is " + << now.elapsed() << " sec." << std::endl; + } + h5f.close(); + } + this->myComm->bcast(foundspline); + if (foundspline) + { + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " SplineSetReader bcast the full table " << now.elapsed() << " sec." << std::endl; + app_log().flush(); + } + else + { + bspline->flush_zero(); - TinyVector start(0.0); - TinyVector end(1.0); - spline_r = einspline::create( - spline_r, start, end, this->MeshSize, bspline->HalfG); - if (bspline->isComplex()) - spline_i = einspline::create( - spline_i, start, end, this->MeshSize, bspline->HalfG); + int nx = this->MeshSize[0]; + int ny = this->MeshSize[1]; + int nz = this->MeshSize[2]; + if (havePsig) // perform FFT using FFTW + { + FFTbox.resize(nx, ny, nz); + FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), + reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); + splineData_r.resize(nx, ny, nz); + if (bspline->isComplex()) + splineData_i.resize(nx, ny, nz); - now.restart(); - initialize_spline_pio_gather(spin, bandgroup); - app_log() << " SplineSetReader initialize_spline_pio " - << now.elapsed() << " sec" << std::endl; + TinyVector start(0.0); + TinyVector end(1.0); + spline_r = einspline::create(spline_r, start, end, this->MeshSize, bspline->HalfG); + if (bspline->isComplex()) + spline_i = einspline::create(spline_i, start, end, this->MeshSize, bspline->HalfG); - fftw_destroy_plan(FFTplan); - FFTplan = NULL; - } - else // why, don't know - initialize_spline_psi_r(spin, bandgroup); - if (this->saveSplineCoefs && root) { - now.restart(); - hdf_archive h5f; - h5f.create(splinefile); - std::string classname = bspline->getClassName(); - h5f.write(classname, "class_name"); - int sizeD = sizeof(DataType); - h5f.write(sizeD, "sizeof"); - bspline->write_splines(h5f); - h5f.close(); - app_log() << " Stored spline coefficients in " << splinefile - << " for potential reuse. The writing time is " - << now.elapsed() << " sec." << std::endl; - } - } + now.restart(); + initialize_spline_pio_gather(spin, bandgroup); + app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; - clear(); - return std::unique_ptr>{bspline}; + fftw_destroy_plan(FFTplan); + FFTplan = NULL; + } + else // why, don't know + initialize_spline_psi_r(spin, bandgroup); + if (this->saveSplineCoefs && root) + { + now.restart(); + hdf_archive h5f; + h5f.create(splinefile); + std::string classname = bspline->getClassName(); + h5f.write(classname, "class_name"); + int sizeD = sizeof(DataType); + h5f.write(sizeD, "sizeof"); + bspline->write_splines(h5f); + h5f.close(); + app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " + << now.elapsed() << " sec." << std::endl; + } } - /** fft and spline cG + clear(); + return std::unique_ptr>{bspline}; + } + + /** fft and spline cG * @param cG psi_g to be processed * @param ti twist index * @param iorb orbital index * * Perform FFT and spline to spline_r and spline_i */ - inline void - fft_spline(Vector>& cG, int ti) + inline void fft_spline(Vector>& cG, int ti) + { + unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox); + fftw_execute(FFTplan); + if (bspline->isComplex()) { - unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox); - fftw_execute(FFTplan); - if (bspline->isComplex()) { - if (this->rotate) - fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, - this->mybuilder->primcell_kpoints[ti], rotate_phase_r, - rotate_phase_i); - else { - split_real_components_c2c(FFTbox, splineData_r, splineData_i); - rotate_phase_r = 1.0; - rotate_phase_i = 0.0; - } - einspline::set(spline_r, splineData_r.data()); - einspline::set(spline_i, splineData_i.data()); - } - else { - fix_phase_rotate_c2r(FFTbox, splineData_r, - this->mybuilder->primcell_kpoints[ti], rotate_phase_r, - rotate_phase_i); - einspline::set(spline_r, splineData_r.data()); - } + if (this->rotate) + fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, this->mybuilder->primcell_kpoints[ti], rotate_phase_r, + rotate_phase_i); + else + { + split_real_components_c2c(FFTbox, splineData_r, splineData_i); + rotate_phase_r = 1.0; + rotate_phase_i = 0.0; + } + einspline::set(spline_r, splineData_r.data()); + einspline::set(spline_i, splineData_i.data()); } + else + { + fix_phase_rotate_c2r(FFTbox, splineData_r, this->mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i); + einspline::set(spline_r, splineData_r.data()); + } + } - /** initialize the splines + /** initialize the splines */ - void - initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) - { - // distribute bands over processor groups - int Nbands = bandgroup.getNumDistinctOrbitals(); - const int Nprocs = this->myComm->size(); - const int Nbandgroups = std::min(Nbands, Nprocs); - Communicate band_group_comm(*this->myComm, Nbandgroups); - std::vector band_groups(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, band_groups); - int iorb_first = band_groups[band_group_comm.getGroupID()]; - int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + void initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) + { + // distribute bands over processor groups + int Nbands = bandgroup.getNumDistinctOrbitals(); + const int Nprocs = this->myComm->size(); + const int Nbandgroups = std::min(Nbands, Nprocs); + Communicate band_group_comm(*this->myComm, Nbandgroups); + std::vector band_groups(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, band_groups); + int iorb_first = band_groups[band_group_comm.getGroupID()]; + int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; - app_log() << "Start transforming plane waves to 3D B-Splines." - << std::endl; - hdf_archive h5f(&band_group_comm, false); - Vector> cG(this->mybuilder->Gvecs[0].size()); - const std::vector& cur_bands = bandgroup.myBands; - if (band_group_comm.isGroupLeader()) - h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY); - for (int iorb = iorb_first; iorb < iorb_last; iorb++) { - if (band_group_comm.isGroupLeader()) { - int iorb_h5 = bspline->BandIndexMap[iorb]; - int ti = cur_bands[iorb_h5].TwistIndex; - std::string s = - this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); - if (!h5f.readEntry(cG, s)) { - std::ostringstream msg; - msg << "SplineSetReader Failed to read band(s) from h5 " - "file. " - << "Attempted dataset " << s << " with " << cG.size() - << " complex numbers." << std::endl; - throw std::runtime_error(msg.str()); - } - double total_norm = compute_norm(cG); - if ((this->checkNorm) && - (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) { - std::ostringstream msg; - msg << "SplineSetReader The orbital " << iorb_h5 - << " has a wrong norm " << total_norm - << ", computed from plane wave coefficients!" - << std::endl - << "This may indicate a problem with the HDF5 library " - "versions used " - << "during wavefunction conversion or read." - << std::endl; - throw std::runtime_error(msg.str()); - } - fft_spline(cG, ti); - bspline->set_spline( - spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); - } - this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); + app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; + hdf_archive h5f(&band_group_comm, false); + Vector> cG(this->mybuilder->Gvecs[0].size()); + const std::vector& cur_bands = bandgroup.myBands; + if (band_group_comm.isGroupLeader()) + h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY); + for (int iorb = iorb_first; iorb < iorb_last; iorb++) + { + if (band_group_comm.isGroupLeader()) + { + int iorb_h5 = bspline->BandIndexMap[iorb]; + int ti = cur_bands[iorb_h5].TwistIndex; + std::string s = this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); + if (!h5f.readEntry(cG, s)) + { + std::ostringstream msg; + msg << "SplineSetReader Failed to read band(s) from h5 " + "file. " + << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; + throw std::runtime_error(msg.str()); } - - this->myComm->barrier(); - Timer now; - if (band_group_comm.isGroupLeader()) { - now.restart(); - bspline->gather_tables(band_group_comm.getGroupLeaderComm()); - app_log() << " Time to gather the table = " << now.elapsed() - << std::endl; + double total_norm = compute_norm(cG); + if ((this->checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) + { + std::ostringstream msg; + msg << "SplineSetReader The orbital " << iorb_h5 << " has a wrong norm " << total_norm + << ", computed from plane wave coefficients!" << std::endl + << "This may indicate a problem with the HDF5 library " + "versions used " + << "during wavefunction conversion or read." << std::endl; + throw std::runtime_error(msg.str()); } - now.restart(); - bspline->bcast_tables(this->myComm); - app_log() << " Time to bcast the table = " << now.elapsed() - << std::endl; + fft_spline(cG, ti); + bspline->set_spline(spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); + } + this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); } - void - initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) + this->myComm->barrier(); + Timer now; + if (band_group_comm.isGroupLeader()) { - // old implementation buried in the history - this->myComm->barrier_and_abort( - "SplineSetReaderP initialize_spline_psi_r " - "implementation not finished."); + now.restart(); + bspline->gather_tables(band_group_comm.getGroupLeaderComm()); + app_log() << " Time to gather the table = " << now.elapsed() << std::endl; } + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; + } + + void initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) + { + // old implementation buried in the history + this->myComm->barrier_and_abort("SplineSetReaderP initialize_spline_psi_r " + "implementation not finished."); + } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h index 898d8f2a2e4..2f991b228e5 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h @@ -18,42 +18,42 @@ namespace qmcplusplus { /// forward declaration -template +template class BsplineReaderBaseT; -template +template class EinsplineSetBuilderT; /** create a reader which handles complex (double size real) splines, C2R or C2C * case spline storage and computation precision is double */ -template -std::unique_ptr> -createBsplineComplexDoubleT( - EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); +template +std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); /** create a reader which handles complex (double size real) splines, C2R or C2C * case spline storage and computation precision is float */ -template -std::unique_ptr> -createBsplineComplexSingleT( - EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); +template +std::unique_ptr> createBsplineComplexSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); /** create a reader which handles real splines, R2R case * spline storage and computation precision is double */ -template -std::unique_ptr> -createBsplineRealDoubleT( - EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); +template +std::unique_ptr> createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); /** create a reader which handles real splines, R2R case * spline storage and computation precision is float */ -template -std::unique_ptr> -createBsplineRealSingleT( - EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); +template +std::unique_ptr> createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp index 51b01d756f0..0f4da56d1ac 100644 --- a/src/QMCWaveFunctions/CompositeSPOSetT.cpp +++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp @@ -33,182 +33,187 @@ namespace MatrixOperators * @todo smater and more efficient matrix, move up for others * The columns [0,M1) are inserted into [offset_c,offset_c+M1). */ -template -inline void -insert_columns(const MAT1& small, MAT2& big, int offset_c) +template +inline void insert_columns(const MAT1& small, MAT2& big, int offset_c) { - const int c = small.cols(); - for (int i = 0; i < small.rows(); ++i) - std::copy(small[i], small[i] + c, big[i] + offset_c); + const int c = small.cols(); + for (int i = 0; i < small.rows(); ++i) + std::copy(small[i], small[i] + c, big[i] + offset_c); } } // namespace MatrixOperators -template -CompositeSPOSetT::CompositeSPOSetT(const std::string& my_name) : - SPOSetT(my_name) +template +CompositeSPOSetT::CompositeSPOSetT(const std::string& my_name) : SPOSetT(my_name) { - this->OrbitalSetSize = 0; - component_offsets.reserve(4); + this->OrbitalSetSize = 0; + component_offsets.reserve(4); } -template -CompositeSPOSetT::CompositeSPOSetT(const CompositeSPOSetT& other) : - SPOSetT(other) +template +CompositeSPOSetT::CompositeSPOSetT(const CompositeSPOSetT& other) : SPOSetT(other) { - for (auto& element : other.components) { - this->add(element->makeClone()); - } + for (auto& element : other.components) + { + this->add(element->makeClone()); + } } -template +template CompositeSPOSetT::~CompositeSPOSetT() = default; -template -void -CompositeSPOSetT::add(std::unique_ptr> component) +template +void CompositeSPOSetT::add(std::unique_ptr> component) { - if (components.empty()) - component_offsets.push_back(0); // add 0 + if (components.empty()) + component_offsets.push_back(0); // add 0 - int norbs = component->size(); - components.push_back(std::move(component)); - component_values.emplace_back(norbs); - component_gradients.emplace_back(norbs); - component_laplacians.emplace_back(norbs); + int norbs = component->size(); + components.push_back(std::move(component)); + component_values.emplace_back(norbs); + component_gradients.emplace_back(norbs); + component_laplacians.emplace_back(norbs); - this->OrbitalSetSize += norbs; - component_offsets.push_back(this->OrbitalSetSize); + this->OrbitalSetSize += norbs; + component_offsets.push_back(this->OrbitalSetSize); } -template -void -CompositeSPOSetT::report() +template +void CompositeSPOSetT::report() { - app_log() << "CompositeSPOSetT" << std::endl; - app_log() << " ncomponents = " << components.size() << std::endl; - app_log() << " components" << std::endl; - for (int i = 0; i < components.size(); ++i) { - app_log() << " " << i << std::endl; - components[i]->basic_report(" "); - } + app_log() << "CompositeSPOSetT" << std::endl; + app_log() << " ncomponents = " << components.size() << std::endl; + app_log() << " components" << std::endl; + for (int i = 0; i < components.size(); ++i) + { + app_log() << " " << i << std::endl; + components[i]->basic_report(" "); + } } -template -std::unique_ptr> -CompositeSPOSetT::makeClone() const +template +std::unique_ptr> CompositeSPOSetT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void -CompositeSPOSetT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void CompositeSPOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - int n = 0; - for (int c = 0; c < components.size(); ++c) { - SPOSetT& component = *components[c]; - ValueVector& values = component_values[c]; - component.evaluateValue(P, iat, values); - std::copy(values.begin(), values.end(), psi.begin() + n); - n += component.size(); - } + int n = 0; + for (int c = 0; c < components.size(); ++c) + { + SPOSetT& component = *components[c]; + ValueVector& values = component_values[c]; + component.evaluateValue(P, iat, values); + std::copy(values.begin(), values.end(), psi.begin() + n); + n += component.size(); + } } -template -void -CompositeSPOSetT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void CompositeSPOSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - int n = 0; - for (int c = 0; c < components.size(); ++c) { - SPOSetT& component = *components[c]; - ValueVector& values = component_values[c]; - GradVector& gradients = component_gradients[c]; - ValueVector& laplacians = component_laplacians[c]; - component.evaluateVGL(P, iat, values, gradients, laplacians); - std::copy(values.begin(), values.end(), psi.begin() + n); - std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n); - std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n); - n += component.size(); - } + int n = 0; + for (int c = 0; c < components.size(); ++c) + { + SPOSetT& component = *components[c]; + ValueVector& values = component_values[c]; + GradVector& gradients = component_gradients[c]; + ValueVector& laplacians = component_laplacians[c]; + component.evaluateVGL(P, iat, values, gradients, laplacians); + std::copy(values.begin(), values.end(), psi.begin() + n); + std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n); + std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n); + n += component.size(); + } } -template -void -CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - ValueMatrix l(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, l); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(l, d2logdet, n); - } + const int nat = last - first; + for (int c = 0; c < components.size(); ++c) + { + int norb = components[c]->size(); + ValueMatrix v(nat, norb); + GradMatrix g(nat, norb); + ValueMatrix l(nat, norb); + components[c]->evaluate_notranspose(P, first, last, v, g, l); + int n = component_offsets[c]; + MatrixOperators::insert_columns(v, logdet, n); + MatrixOperators::insert_columns(g, dlogdet, n); + MatrixOperators::insert_columns(l, d2logdet, n); + } } -template -void -CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) { - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - HessMatrix h(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, h); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(h, grad_grad_logdet, n); - } + const int nat = last - first; + for (int c = 0; c < components.size(); ++c) + { + int norb = components[c]->size(); + ValueMatrix v(nat, norb); + GradMatrix g(nat, norb); + HessMatrix h(nat, norb); + components[c]->evaluate_notranspose(P, first, last, v, g, h); + int n = component_offsets[c]; + MatrixOperators::insert_columns(v, logdet, n); + MatrixOperators::insert_columns(g, dlogdet, n); + MatrixOperators::insert_columns(h, grad_grad_logdet, n); + } } -template -void -CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet) +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { - not_implemented( - "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); + not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); } -template -std::unique_ptr> -CompositeSPOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> CompositeSPOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - std::vector spolist; - putContent(spolist, cur); - if (spolist.empty()) { - return nullptr; - } - - auto spo_now = std::make_unique>( - getXMLAttributeValue(cur, "name")); - for (int i = 0; i < spolist.size(); ++i) { - const SPOSetT* spo = sposet_builder_factory_.getSPOSet(spolist[i]); - if (spo) - spo_now->add(spo->makeClone()); - } - return (spo_now->size()) ? std::unique_ptr>{std::move(spo_now)} : - nullptr; + std::vector spolist; + putContent(spolist, cur); + if (spolist.empty()) + { + return nullptr; + } + + auto spo_now = std::make_unique>(getXMLAttributeValue(cur, "name")); + for (int i = 0; i < spolist.size(); ++i) + { + const SPOSetT* spo = sposet_builder_factory_.getSPOSet(spolist[i]); + if (spo) + spo_now->add(spo->makeClone()); + } + return (spo_now->size()) ? std::unique_ptr>{std::move(spo_now)} : nullptr; } -template -std::unique_ptr> -CompositeSPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) +template +std::unique_ptr> CompositeSPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) { - return createSPOSetFromXML(cur); + return createSPOSetFromXML(cur); } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h index ec597a7eb4e..33a27e47737 100644 --- a/src/QMCWaveFunctions/CompositeSPOSetT.h +++ b/src/QMCWaveFunctions/CompositeSPOSetT.h @@ -24,109 +24,97 @@ namespace qmcplusplus { -template +template class CompositeSPOSetT : public SPOSetT { public: - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGMatrix = typename SPOSetT::GGGMatrix; - - /// component SPOSets - std::vector>> components; - /// temporary storage for values - std::vector component_values; - /// temporary storage for gradients - std::vector component_gradients; - /// temporary storage for laplacians - std::vector component_laplacians; - /// store the precomputed offsets - std::vector component_offsets; - - CompositeSPOSetT(const std::string& my_name); - /** + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + + /// component SPOSets + std::vector>> components; + /// temporary storage for values + std::vector component_values; + /// temporary storage for gradients + std::vector component_gradients; + /// temporary storage for laplacians + std::vector component_laplacians; + /// store the precomputed offsets + std::vector component_offsets; + + CompositeSPOSetT(const std::string& my_name); + /** * @TODO: do we want template copy constructor * (i.e., copy from other with different type argument)? */ - CompositeSPOSetT(const CompositeSPOSetT& other); - ~CompositeSPOSetT() override; - - std::string - getClassName() const override - { - return "CompositeSPOSetT"; - } - - /// add a sposet component to this composite sposet - void - add(std::unique_ptr> component); - - /// print out component info - void - report(); - - // SPOSet interface methods - /// size is determined by component sposets and nothing else - inline void - setOrbitalSetSize(int norbs) override - { - } - - std::unique_ptr> - makeClone() const override; - - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - /// unimplemented functions call this to abort - inline void - not_implemented(const std::string& method) - { - APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented"); - } - - // methods to be implemented in the future (possibly) - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; + CompositeSPOSetT(const CompositeSPOSetT& other); + ~CompositeSPOSetT() override; + + std::string getClassName() const override { return "CompositeSPOSetT"; } + + /// add a sposet component to this composite sposet + void add(std::unique_ptr> component); + + /// print out component info + void report(); + + // SPOSet interface methods + /// size is determined by component sposets and nothing else + inline void setOrbitalSetSize(int norbs) override {} + + std::unique_ptr> makeClone() const override; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + /// unimplemented functions call this to abort + inline void not_implemented(const std::string& method) + { + APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented"); + } + + // methods to be implemented in the future (possibly) + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet, + GGGMatrix& dddlogdet) override; }; -template +template class CompositeSPOSetBuilderT : public SPOSetBuilderT { public: - CompositeSPOSetBuilderT( - Communicate* comm, const SPOSetBuilderFactoryT& factory) : - SPOSetBuilderT("Composite", comm), - sposet_builder_factory_(factory) - { - } - - // SPOSetBuilder interface - std::unique_ptr> - createSPOSetFromXML(xmlNodePtr cur) override; - - std::unique_ptr> - createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - - /// reference to the sposet_builder_factory - const SPOSetBuilderFactoryT& sposet_builder_factory_; + CompositeSPOSetBuilderT(Communicate* comm, const SPOSetBuilderFactoryT& factory) + : SPOSetBuilderT("Composite", comm), sposet_builder_factory_(factory) + {} + + // SPOSetBuilder interface + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + + std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; + + /// reference to the sposet_builder_factory + const SPOSetBuilderFactoryT& sposet_builder_factory_; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h index c55ef7fd627..26cc29e5594 100644 --- a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h +++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h @@ -32,7 +32,7 @@ class EinsplineSpinorSetBuilderT : public EinsplineSetBuilderT { using ParticleSet = ParticleSetT; using SPOSet = SPOSetT; - using PSetMap = std::map>; + using PSetMap = std::map>; public: ///constructor diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp index b028eb039b5..c6de0f7c106 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp @@ -6,12 +6,12 @@ namespace qmcplusplus { -template +template FreeOrbitalBuilderT::FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur) : SPOSetBuilderT("PW", comm), targetPtcl(els) {} -template +template std::unique_ptr> FreeOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) { int norb = -1; @@ -89,7 +89,7 @@ std::unique_ptr> FreeOrbitalBuilderT::createSPOSetFromXML(xmlNodeP return sposet; } -template +template bool FreeOrbitalBuilderT::in_list(const int j, const std::vector l) { for (int i = 0; i < l.size(); i++) diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h index 06e4b730a3c..6322688327b 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h @@ -5,12 +5,12 @@ namespace qmcplusplus { -template +template class FreeOrbitalBuilderT : public SPOSetBuilderT { public: - using RealType = typename SPOSetBuilderT::RealType; - using PosType = typename SPOSetBuilderT::PosType; + using RealType = typename SPOSetBuilderT::RealType; + using PosType = typename SPOSetBuilderT::PosType; FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur); ~FreeOrbitalBuilderT() {} diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp index bc4bec54085..626320f7ac6 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp @@ -25,643 +25,677 @@ namespace qmcplusplus { -template -void -FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) +template +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{} + +template<> +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) { + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; } -template <> -void -FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) +template<> +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; - } - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; } -template <> -void -FreeOrbitalT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) +template<> +void FreeOrbitalT>::evaluateVGL(const ParticleSetT>& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; - } - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } } -template <> -void -FreeOrbitalT>::evaluateVGL( - const ParticleSetT>& P, int iat, ValueVector& pvec, - GradVector& dpvec, ValueVector& d2pvec) +template<> +void FreeOrbitalT>::evaluateVGL(const ParticleSetT>& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } } -template <> -void -FreeOrbitalT>::evaluateVGL( - const ParticleSetT>& P, int iat, ValueVector& pvec, - GradVector& dpvec, ValueVector& d2pvec) +template<> +void FreeOrbitalT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; } -template <> -void -FreeOrbitalT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& pvec) +template<> +void FreeOrbitalT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } - pvec[0] = 1.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; } -template <> -void -FreeOrbitalT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& pvec) +template<> +void FreeOrbitalT>::evaluateValue(const ParticleSetT>& P, + int iat, + ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - } - pvec[0] = 1.0; + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + pvec[ik] = std::complex(coskr, sinkr); + } } -template <> -void -FreeOrbitalT>::evaluateValue( - const ParticleSetT>& P, int iat, ValueVector& pvec) +template<> +void FreeOrbitalT>::evaluateValue(const ParticleSetT>& P, + int iat, + ValueVector& pvec) { - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - pvec[ik] = std::complex(coskr, sinkr); - } + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + pvec[ik] = std::complex(coskr, sinkr); + } } -template <> -void -FreeOrbitalT>::evaluateValue( - const ParticleSetT>& P, int iat, ValueVector& pvec) +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) { + RealType sinkr, coskr; + float phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - pvec[ik] = std::complex(coskr, sinkr); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + } + } } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + } } -template -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) { -} + RealType sinkr, coskr; + double phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); -template <> -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& phi, GradMatrix& dphi, - HessMatrix& d2phi_mat) -{ - RealType sinkr, coskr; - float phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[j1](la, lb) = - -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = - -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - } - } + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); } - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; + } } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + } } -template <> -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& phi, GradMatrix& dphi, - HessMatrix& d2phi_mat) +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - double phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[j1](la, lb) = - -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = - -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - } - } - } - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - } -} + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); -template <> -void -FreeOrbitalT>::evaluate_notranspose( - const ParticleSetT>& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) -{ - RealType sinkr, coskr; - std::complex phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - phi_of_r = std::complex(coskr, sinkr); - p[ik] = phi_of_r; - - dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[ik](la, la) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[ik](la, lb) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); } + } } + } } -template <> -void -FreeOrbitalT>::evaluate_notranspose( - const ParticleSetT>& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) { - RealType sinkr, coskr; - std::complex phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - HessVector hess(d2phi_mat[i], this->OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - - phi_of_r = std::complex(coskr, sinkr); - p[ik] = phi_of_r; - - dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[ik](la, la) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[ik](la, lb) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); } + } } + } } -template -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { -} + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); -template <> -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& phi, GradMatrix& dphi, - HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat) -{ - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = - sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[j1](la, lb) = - -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = - -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } - } - } + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) + { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } } - - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; + } } + + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; + } } -template <> -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& phi, GradMatrix& dphi, - HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat) +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = - sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[j1](la, lb) = - -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = - -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * - (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } - } - } - } + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) + { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } + } + } } + + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; + } } -template <> -void -FreeOrbitalT>::evaluate_notranspose( - const ParticleSetT>& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[ik](la, la) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[ik](la, lb) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } - for (int la = 0; la < OHMMS_DIM; la++) { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); } + } + for (int la = 0; la < OHMMS_DIM; la++) + { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } } + } } -template <> -void -FreeOrbitalT>::evaluate_notranspose( - const ParticleSetT>& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) { - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) { - hess[ik](la, la) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) { - hess[ik](la, lb) = - -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } - for (int la = 0; la < OHMMS_DIM; la++) { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); } + } + for (int la = 0; la < OHMMS_DIM; la++) + { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } } + } } // generic implementation -template +template FreeOrbitalT::~FreeOrbitalT() +{} + +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + ValueMatrix& d2phi) { -} - -template -void -FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) -{ - for (int iat = first, i = 0; iat < last; iat++, i++) { - ValueVector p(phi[i], this->OrbitalSetSize); - GradVector dp(dphi[i], this->OrbitalSetSize); - ValueVector d2p(d2phi[i], this->OrbitalSetSize); - evaluateVGL(P, iat, p, dp, d2p); - } + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + ValueVector d2p(d2phi[i], this->OrbitalSetSize); + evaluateVGL(P, iat, p, dp, d2p); + } } // Explicit template specialization -template <> -FreeOrbitalT::FreeOrbitalT( - const std::string& my_name, const std::vector& kpts_cart) : - SPOSetT(my_name), - kvecs(kpts_cart), - mink(1), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template<> +FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = - 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template <> -FreeOrbitalT::FreeOrbitalT( - const std::string& my_name, const std::vector& kpts_cart) : - SPOSetT(my_name), - kvecs(kpts_cart), - mink(1), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template<> +FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = - 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template <> -FreeOrbitalT>::FreeOrbitalT( - const std::string& my_name, const std::vector& kpts_cart) : - SPOSetT>(my_name), - kvecs(kpts_cart), - mink(0), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template<> +FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = maxk; // SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template <> -FreeOrbitalT>::FreeOrbitalT( - const std::string& my_name, const std::vector& kpts_cart) : - SPOSetT>(my_name), - kvecs(kpts_cart), - mink(0), // treat k=0 as special case - maxk(kpts_cart.size()), - k2neg(maxk) +template<> +FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) { - this->OrbitalSetSize = maxk; // SPOSet member - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); } -template -void -FreeOrbitalT::report(const std::string& pad) const +template +void FreeOrbitalT::report(const std::string& pad) const { - app_log() << pad << "FreeOrbital report" << std::endl; - for (int ik = 0; ik < kvecs.size(); ik++) { - app_log() << pad << ik << " " << kvecs[ik] << std::endl; - } - app_log() << pad << "end FreeOrbital report" << std::endl; - app_log().flush(); + app_log() << pad << "FreeOrbital report" << std::endl; + for (int ik = 0; ik < kvecs.size(); ik++) + { + app_log() << pad << ik << " " << kvecs[ik] << std::endl; + } + app_log() << pad << "end FreeOrbital report" << std::endl; + app_log().flush(); } template class FreeOrbitalT; diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h index 18e8899cca3..de79c713f35 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h @@ -27,74 +27,66 @@ namespace qmcplusplus { -template +template class FreeOrbitalT : public SPOSetT { public: - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGMatrix = typename SPOSetT::GGGMatrix; - using RealType = typename SPOSetT::RealType; - using PosType = typename SPOSetT::PosType; - using ValueType = typename SPOSetT::ValueType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using ValueType = typename SPOSetT::ValueType; - FreeOrbitalT( - const std::string& my_name, const std::vector& kpts_cart); - ~FreeOrbitalT(); + FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart); + ~FreeOrbitalT(); - inline std::string - getClassName() const final - { - return "FreeOrbital"; - } + inline std::string getClassName() const final { return "FreeOrbital"; } - // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j - // i \in [first, last) - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) final; + // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j + // i \in [first, last) + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + ValueMatrix& d2phi) final; - // plug r_i into all orbitals - void - evaluateVGL(const ParticleSetT& P, int i, ValueVector& pvec, - GradVector& dpvec, ValueVector& d2pvec) final; - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) final; + // plug r_i into all orbitals + void evaluateVGL(const ParticleSetT& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) final; - // hessian matrix is needed by backflow - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) final; + // hessian matrix is needed by backflow + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) final; - // derivative of hessian is needed to optimize backflow - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) override; + // derivative of hessian is needed to optimize backflow + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) override; - void - report(const std::string& pad) const override; - // ---- begin required overrides - std::unique_ptr> - makeClone() const final - { - return std::make_unique>(*this); - } - void - setOrbitalSetSize(int norbs) final - { - throw std::runtime_error("not implemented"); - } - // required overrides end ---- + void report(const std::string& pad) const override; + // ---- begin required overrides + std::unique_ptr> makeClone() const final { return std::make_unique>(*this); } + void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); } + // required overrides end ---- private: - const std::vector kvecs; // kvecs vectors - const int mink; // minimum k index - const int maxk; // maximum number of kvecs vectors - std::vector k2neg; // minus kvecs^2 + const std::vector kvecs; // kvecs vectors + const int mink; // minimum k index + const int maxk; // maximum number of kvecs vectors + std::vector k2neg; // minus kvecs^2 }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp index 7c309d5b873..92822f75a18 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp @@ -22,190 +22,188 @@ namespace qmcplusplus { -template -SHOSetBuilderT::SHOSetBuilderT(ParticleSetT& P, Communicate* comm) : - SPOSetBuilderT("SHO", comm), - Ps(P) +template +SHOSetBuilderT::SHOSetBuilderT(ParticleSetT& P, Communicate* comm) : SPOSetBuilderT("SHO", comm), Ps(P) { - this->ClassName = "SHOSetBuilderT"; - this->legacy = false; - app_log() << "Constructing SHOSetBuilderT" << std::endl; - reset(); + this->ClassName = "SHOSetBuilderT"; + this->legacy = false; + app_log() << "Constructing SHOSetBuilderT" << std::endl; + reset(); } -template +template SHOSetBuilderT::~SHOSetBuilderT() = default; -template -void -SHOSetBuilderT::reset() +template +void SHOSetBuilderT::reset() { - nstates = 0; - mass = -1.0; - energy = -1.0; - length = -1.0; - center = 0.0; + nstates = 0; + mass = -1.0; + energy = -1.0; + length = -1.0; + center = 0.0; } -template -std::unique_ptr> -SHOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> SHOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - APP_ABORT("SHOSetBuilderT::createSPOSetFromXML SHOSetBuilder should not " - "use legacy interface"); + APP_ABORT("SHOSetBuilderT::createSPOSetFromXML SHOSetBuilder should not " + "use legacy interface"); - app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl; + app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl; - SPOSetInputInfo input(cur); + SPOSetInputInfo input(cur); - return createSPOSet(cur, input); + return createSPOSet(cur, input); } -template -std::unique_ptr> -SHOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) +template +std::unique_ptr> SHOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) { - app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl; - reset(); - - // read parameters - std::string spo_name = "sho"; - OhmmsAttributeSet attrib; - attrib.add(spo_name, "name"); - attrib.add(spo_name, "id"); - attrib.add(mass, "mass"); - attrib.add(energy, "energy"); - attrib.add(energy, "frequency"); - attrib.add(length, "length"); - attrib.add(center, "center"); - attrib.add(nstates, "size"); - attrib.put(cur); - - if (energy < 0.0) - energy = 1.0; - if (mass < 0.0 && length < 0.0) - length = 1.0; - if (mass < 0.0) - mass = 1.0 / (energy * length * length); - else if (length < 0.0) - length = 1.0 / std::sqrt(mass * energy); - - // initialize states and/or adjust basis - int smax = -1; - if (input.has_index_info) - smax = std::max(smax, input.max_index()); - if (input.has_energy_info) { - smax = std::max(smax, (int)std::ceil(input.max_energy() / energy)); - } - if (smax < 0) - APP_ABORT("SHOSetBuilderT::Initialize\n invalid basis size"); - update_basis_states(smax); - - // create sho state request - indices_t& indices = input.get_indices(this->states); - std::vector sho_states; - for (int i = 0; i < indices.size(); ++i) - sho_states.push_back(basis_states[indices[i]]); - - // make the sposet - auto sho = - std::make_unique>(spo_name, length, center, sho_states); - - sho->report(" "); - return sho; + app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl; + reset(); + + // read parameters + std::string spo_name = "sho"; + OhmmsAttributeSet attrib; + attrib.add(spo_name, "name"); + attrib.add(spo_name, "id"); + attrib.add(mass, "mass"); + attrib.add(energy, "energy"); + attrib.add(energy, "frequency"); + attrib.add(length, "length"); + attrib.add(center, "center"); + attrib.add(nstates, "size"); + attrib.put(cur); + + if (energy < 0.0) + energy = 1.0; + if (mass < 0.0 && length < 0.0) + length = 1.0; + if (mass < 0.0) + mass = 1.0 / (energy * length * length); + else if (length < 0.0) + length = 1.0 / std::sqrt(mass * energy); + + // initialize states and/or adjust basis + int smax = -1; + if (input.has_index_info) + smax = std::max(smax, input.max_index()); + if (input.has_energy_info) + { + smax = std::max(smax, (int)std::ceil(input.max_energy() / energy)); + } + if (smax < 0) + APP_ABORT("SHOSetBuilderT::Initialize\n invalid basis size"); + update_basis_states(smax); + + // create sho state request + indices_t& indices = input.get_indices(this->states); + std::vector sho_states; + for (int i = 0; i < indices.size(); ++i) + sho_states.push_back(basis_states[indices[i]]); + + // make the sposet + auto sho = std::make_unique>(spo_name, length, center, sho_states); + + sho->report(" "); + return sho; } -template -void -SHOSetBuilderT::update_basis_states(int smax) +template +void SHOSetBuilderT::update_basis_states(int smax) { - int states_required = smax - basis_states.size() + 1; - if (states_required > 0) { - RealType N = smax + 1; - if (QMCTraits::DIM == 1) - nmax = smax; - else if (QMCTraits::DIM == 2) - nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5); - else if (QMCTraits::DIM == 3) { - RealType f = std::exp(1.0 / 3.0 * - std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.))); - nmax = std::ceil(f / 3. + 1. / f - 2.); - } + int states_required = smax - basis_states.size() + 1; + if (states_required > 0) + { + RealType N = smax + 1; + if (QMCTraits::DIM == 1) + nmax = smax; + else if (QMCTraits::DIM == 2) + nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5); + else if (QMCTraits::DIM == 3) + { + RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.))); + nmax = std::ceil(f / 3. + 1. / f - 2.); + } + else + APP_ABORT("SHOSetBuilderT::update_basis_states dimensions other " + "than 1, 2, or 3 are not supported"); + int ndim = nmax + 1; + ind_dims[QMCTraits::DIM - 1] = 1; + for (int d = QMCTraits::DIM - 2; d > -1; --d) + ind_dims[d] = ind_dims[d + 1] * ndim; + int s = 0; + int ntot = pow(ndim, QMCTraits::DIM); + TinyVector qnumber; + for (int m = 0; m < ntot; ++m) + { + int n = 0; // principal quantum number + int nrem = m; + for (int d = 0; d < QMCTraits::DIM; ++d) + { + int i = nrem / ind_dims[d]; + nrem -= i * ind_dims[d]; + qnumber[d] = i; + n += i; + } + if (n <= nmax) + { + SHOState* st; + if (s < basis_states.size()) + st = basis_states[s]; else - APP_ABORT("SHOSetBuilderT::update_basis_states dimensions other " - "than 1, 2, or 3 are not supported"); - int ndim = nmax + 1; - ind_dims[QMCTraits::DIM - 1] = 1; - for (int d = QMCTraits::DIM - 2; d > -1; --d) - ind_dims[d] = ind_dims[d + 1] * ndim; - int s = 0; - int ntot = pow(ndim, QMCTraits::DIM); - TinyVector qnumber; - for (int m = 0; m < ntot; ++m) { - int n = 0; // principal quantum number - int nrem = m; - for (int d = 0; d < QMCTraits::DIM; ++d) { - int i = nrem / ind_dims[d]; - nrem -= i * ind_dims[d]; - qnumber[d] = i; - n += i; - } - if (n <= nmax) { - SHOState* st; - if (s < basis_states.size()) - st = basis_states[s]; - else { - st = new SHOState(); - basis_states.add(st); - } - RealType e = energy * (n + .5 * QMCTraits::DIM); - st->set(qnumber, e); - s++; - } + { + st = new SHOState(); + basis_states.add(st); } - basis_states.energy_sort(1e-6, true); + RealType e = energy * (n + .5 * QMCTraits::DIM); + st->set(qnumber, e); + s++; + } } - - // reset energy scale even if no states need to be added - for (int i = 0; i < basis_states.size(); ++i) { - SHOState& state = *basis_states[i]; - const TinyVector& qnumber = state.quantum_number; - int n = 0; - for (int d = 0; d < QMCTraits::DIM; ++d) - n += qnumber[d]; - state.energy = energy * (n + .5 * QMCTraits::DIM); - } - - // somewhat redundant, but necessary - this->clear_states(0); - this->states[0]->finish(basis_states.states); - - if (basis_states.size() <= smax) - APP_ABORT("SHOSetBuilderT::update_basis_states failed to make enough " - "states"); + basis_states.energy_sort(1e-6, true); + } + + // reset energy scale even if no states need to be added + for (int i = 0; i < basis_states.size(); ++i) + { + SHOState& state = *basis_states[i]; + const TinyVector& qnumber = state.quantum_number; + int n = 0; + for (int d = 0; d < QMCTraits::DIM; ++d) + n += qnumber[d]; + state.energy = energy * (n + .5 * QMCTraits::DIM); + } + + // somewhat redundant, but necessary + this->clear_states(0); + this->states[0]->finish(basis_states.states); + + if (basis_states.size() <= smax) + APP_ABORT("SHOSetBuilderT::update_basis_states failed to make enough " + "states"); } -template -void -SHOSetBuilderT::report(const std::string& pad) +template +void SHOSetBuilderT::report(const std::string& pad) { - app_log() << pad << "SHOSetBuilderT report" << std::endl; - app_log() << pad << " dimension = " << QMCTraits::DIM << std::endl; - app_log() << pad << " mass = " << mass << std::endl; - app_log() << pad << " frequency = " << energy << std::endl; - app_log() << pad << " energy = " << energy << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nstates = " << nstates << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " ind_dims = " << ind_dims << std::endl; - app_log() << pad << " # basis states = " << basis_states.size() - << std::endl; - app_log() << pad << " basis_states" << std::endl; - for (int s = 0; s < basis_states.size(); ++s) - basis_states[s]->report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSetBuilderT report" << std::endl; - app_log().flush(); + app_log() << pad << "SHOSetBuilderT report" << std::endl; + app_log() << pad << " dimension = " << QMCTraits::DIM << std::endl; + app_log() << pad << " mass = " << mass << std::endl; + app_log() << pad << " frequency = " << energy << std::endl; + app_log() << pad << " energy = " << energy << std::endl; + app_log() << pad << " length = " << length << std::endl; + app_log() << pad << " center = " << center << std::endl; + app_log() << pad << " nstates = " << nstates << std::endl; + app_log() << pad << " nmax = " << nmax << std::endl; + app_log() << pad << " ind_dims = " << ind_dims << std::endl; + app_log() << pad << " # basis states = " << basis_states.size() << std::endl; + app_log() << pad << " basis_states" << std::endl; + for (int s = 0; s < basis_states.size(); ++s) + basis_states[s]->report(pad + " " + int2string(s) + " "); + app_log() << pad << "end SHOSetBuilderT report" << std::endl; + app_log().flush(); } #ifndef QMC_COMPLEX diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h index 96237ab55ee..0c1b4ea48f3 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h @@ -22,48 +22,43 @@ namespace qmcplusplus { -template +template class SHOSetBuilderT : public SPOSetBuilderT { public: - using RealType = typename SPOSetT::RealType; - using PosType = typename SPOSetT::PosType; - using indices_t = typename SPOSetBuilderT::indices_t; + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using indices_t = typename SPOSetBuilderT::indices_t; - ParticleSetT& Ps; + ParticleSetT& Ps; - RealType length; - RealType mass; - RealType energy; - PosType center; + RealType length; + RealType mass; + RealType energy; + PosType center; - int nstates; - int nmax; - TinyVector ind_dims; + int nstates; + int nmax; + TinyVector ind_dims; - SPOSetInfoSimple basis_states; + SPOSetInfoSimple basis_states; - // construction/destruction - SHOSetBuilderT(ParticleSetT& P, Communicate* comm); + // construction/destruction + SHOSetBuilderT(ParticleSetT& P, Communicate* comm); - ~SHOSetBuilderT() override; + ~SHOSetBuilderT() override; - // reset parameters - void - reset(); + // reset parameters + void reset(); - // SPOSetBuilder interface - std::unique_ptr> - createSPOSetFromXML(xmlNodePtr cur) override; + // SPOSetBuilder interface + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; - std::unique_ptr> - createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; + std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - // local functions - void - update_basis_states(int smax); - void - report(const std::string& pad = ""); + // local functions + void update_basis_states(int smax); + void report(const std::string& pad = ""); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp index 1286b07393f..cf7063b0cd4 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp @@ -19,537 +19,553 @@ namespace qmcplusplus { -template -SHOSetT::SHOSetT(const std::string& my_name, RealType l, PosType c, - const std::vector& sho_states) : - SPOSetT(my_name), - length(l), - center(c) +template +SHOSetT::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states) + : SPOSetT(my_name), length(l), center(c) { - state_info.resize(sho_states.size()); - for (int s = 0; s < sho_states.size(); ++s) - state_info[s] = *sho_states[s]; - initialize(); + state_info.resize(sho_states.size()); + for (int s = 0; s < sho_states.size(); ++s) + state_info[s] = *sho_states[s]; + initialize(); } -template -void -SHOSetT::initialize() +template +void SHOSetT::initialize() { - using std::sqrt; + using std::sqrt; - this->OrbitalSetSize = state_info.size(); + this->OrbitalSetSize = state_info.size(); - qn_max = -1; - for (int s = 0; s < state_info.size(); ++s) - for (int d = 0; d < QMCTraits::DIM; ++d) - qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); - qn_max += 1; - - nmax = -1; + qn_max = -1; + for (int s = 0; s < state_info.size(); ++s) for (int d = 0; d < QMCTraits::DIM; ++d) - nmax = std::max(nmax, qn_max[d]); - - prefactors.resize(nmax); - hermite.resize(QMCTraits::DIM, nmax); - bvalues.resize(QMCTraits::DIM, nmax); - - if (nmax > 0) { - prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < nmax; ++n) - prefactors[n] = prefactors[n - 1] / sqrt(2. * n); - } + qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); + qn_max += 1; + + nmax = -1; + for (int d = 0; d < QMCTraits::DIM; ++d) + nmax = std::max(nmax, qn_max[d]); + + prefactors.resize(nmax); + hermite.resize(QMCTraits::DIM, nmax); + bvalues.resize(QMCTraits::DIM, nmax); + + if (nmax > 0) + { + prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); + for (int n = 1; n < nmax; ++n) + prefactors[n] = prefactors[n - 1] / sqrt(2. * n); + } } -template +template SHOSetT::~SHOSetT() = default; -template -std::unique_ptr> -SHOSetT::makeClone() const +template +std::unique_ptr> SHOSetT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void -SHOSetT::report(const std::string& pad) const +template +void SHOSetT::report(const std::string& pad) const { - app_log() << pad << "SHOSet report" << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " qn_max = " << qn_max << std::endl; - app_log() << pad << " # states = " << state_info.size() << std::endl; - app_log() << pad << " states" << std::endl; - for (int s = 0; s < state_info.size(); ++s) - state_info[s].sho_report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSet report" << std::endl; - app_log().flush(); + app_log() << pad << "SHOSet report" << std::endl; + app_log() << pad << " length = " << length << std::endl; + app_log() << pad << " center = " << center << std::endl; + app_log() << pad << " nmax = " << nmax << std::endl; + app_log() << pad << " qn_max = " << qn_max << std::endl; + app_log() << pad << " # states = " << state_info.size() << std::endl; + app_log() << pad << " states" << std::endl; + for (int s = 0; s < state_info.size(); ++s) + state_info[s].sho_report(pad + " " + int2string(s) + " "); + app_log() << pad << "end SHOSet report" << std::endl; + app_log().flush(); } -template -void -SHOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +template +void SHOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], this->size()); - evaluate_v(r, p); + const PosType& r(P.activeR(iat)); + ValueVector p(&psi[0], this->size()); + evaluate_v(r, p); } -template -void -SHOSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], this->size()); - GradVector dp(&dpsi[0], this->size()); - ValueVector d2p(&d2psi[0], this->size()); - evaluate_vgl(r, p, dp, d2p); + const PosType& r(P.activeR(iat)); + ValueVector p(&psi[0], this->size()); + GradVector dp(&dpsi[0], this->size()); + ValueVector d2p(&d2psi[0], this->size()); + evaluate_vgl(r, p, dp, d2p); } -template -void -SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector p(logdet[i], this->size()); - GradVector dp(dlogdet[i], this->size()); - ValueVector d2p(d2logdet[i], this->size()); - evaluate_vgl(P.R[iat], p, dp, d2p); - } + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector p(logdet[i], this->size()); + GradVector dp(dlogdet[i], this->size()); + ValueVector d2p(d2logdet[i], this->size()); + evaluate_vgl(P.R[iat], p, dp, d2p); + } } -template -void -SHOSetT::evaluate_v(PosType r, ValueVector& psi) +template +void SHOSetT::evaluate_v(PosType r, ValueVector& psi) { - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); + PosType x = (r - center) / length; + evaluate_hermite(x); + evaluate_d0(x, psi); } -template -void -SHOSetT::evaluate_vgl( - PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); - evaluate_d1(x, psi, dpsi); - evaluate_d2(x, psi, d2psi); + PosType x = (r - center) / length; + evaluate_hermite(x); + evaluate_d0(x, psi); + evaluate_d1(x, psi, dpsi); + evaluate_d2(x, psi, d2psi); } -template -void -SHOSetT::evaluate_hermite(const PosType& xpos) +template +void SHOSetT::evaluate_hermite(const PosType& xpos) { - for (int d = 0; d < QMCTraits::DIM; ++d) { - int nh = qn_max[d]; - if (nh > 0) { - RealType x = xpos[d]; - hermite(d, 0) = 1.0; - RealType Hnm2 = 0.0; - RealType Hnm1 = 1.0; - for (int n = 1; n < nh; ++n) { - RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2); - hermite(d, n) = Hn; - Hnm2 = Hnm1; - Hnm1 = Hn; - } - } + for (int d = 0; d < QMCTraits::DIM; ++d) + { + int nh = qn_max[d]; + if (nh > 0) + { + RealType x = xpos[d]; + hermite(d, 0) = 1.0; + RealType Hnm2 = 0.0; + RealType Hnm1 = 1.0; + for (int n = 1; n < nh; ++n) + { + RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2); + hermite(d, n) = Hn; + Hnm2 = Hnm1; + Hnm1 = Hn; + } } + } } -template -void -SHOSetT::evaluate_d0(const PosType& xpos, ValueVector& psi) +template +void SHOSetT::evaluate_d0(const PosType& xpos, ValueVector& psi) { - using std::exp; - for (int d = 0; d < QMCTraits::DIM; ++d) { - RealType x = xpos[d]; - RealType g = exp(-.5 * x * x); - for (int n = 0; n < qn_max[d]; ++n) { - bvalues(d, n) = prefactors[n] * g * hermite(d, n); - } - } - for (int s = 0; s < state_info.size(); ++s) { - const SHOState& state = state_info[s]; - RealType phi = 1.0; - for (int d = 0; d < QMCTraits::DIM; ++d) - phi *= bvalues(d, state.quantum_number[d]); - psi[s] = phi; + using std::exp; + for (int d = 0; d < QMCTraits::DIM; ++d) + { + RealType x = xpos[d]; + RealType g = exp(-.5 * x * x); + for (int n = 0; n < qn_max[d]; ++n) + { + bvalues(d, n) = prefactors[n] * g * hermite(d, n); } + } + for (int s = 0; s < state_info.size(); ++s) + { + const SHOState& state = state_info[s]; + RealType phi = 1.0; + for (int d = 0; d < QMCTraits::DIM; ++d) + phi *= bvalues(d, state.quantum_number[d]); + psi[s] = phi; + } } -template -void -SHOSetT::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) +template +void SHOSetT::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) { - RealType ol = 1.0 / length; - for (int d = 0; d < QMCTraits::DIM; ++d) { - RealType x = xpos[d]; - RealType Hnm1 = 0.0; - for (int n = 0; n < qn_max[d]; ++n) { - RealType Hn = hermite(d, n); - bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol; - Hnm1 = Hn; - } - } - for (int s = 0; s < state_info.size(); ++s) { - const SHOState& state = state_info[s]; - TinyVector dphi; - for (int d = 0; d < QMCTraits::DIM; ++d) - dphi[d] = bvalues(d, state.quantum_number[d]); - dphi *= psi[s]; - dpsi[s] = dphi; + RealType ol = 1.0 / length; + for (int d = 0; d < QMCTraits::DIM; ++d) + { + RealType x = xpos[d]; + RealType Hnm1 = 0.0; + for (int n = 0; n < qn_max[d]; ++n) + { + RealType Hn = hermite(d, n); + bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol; + Hnm1 = Hn; } + } + for (int s = 0; s < state_info.size(); ++s) + { + const SHOState& state = state_info[s]; + TinyVector dphi; + for (int d = 0; d < QMCTraits::DIM; ++d) + dphi[d] = bvalues(d, state.quantum_number[d]); + dphi *= psi[s]; + dpsi[s] = dphi; + } } -template -void -SHOSetT::evaluate_d2( - const PosType& xpos, ValueVector& psi, ValueVector& d2psi) +template +void SHOSetT::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi) { - RealType ol2 = 1.0 / (length * length); - for (int d = 0; d < QMCTraits::DIM; ++d) { - RealType x = xpos[d]; - RealType x2 = x * x; - for (int n = 0; n < qn_max[d]; ++n) { - bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2; - } - } - for (int s = 0; s < state_info.size(); ++s) { - const SHOState& state = state_info[s]; - T d2phi = 0.0; - for (int d = 0; d < QMCTraits::DIM; ++d) - d2phi += bvalues(d, state.quantum_number[d]); - d2phi *= psi[s]; - d2psi[s] = d2phi; + RealType ol2 = 1.0 / (length * length); + for (int d = 0; d < QMCTraits::DIM; ++d) + { + RealType x = xpos[d]; + RealType x2 = x * x; + for (int n = 0; n < qn_max[d]; ++n) + { + bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2; } + } + for (int s = 0; s < state_info.size(); ++s) + { + const SHOState& state = state_info[s]; + T d2phi = 0.0; + for (int d = 0; d < QMCTraits::DIM; ++d) + d2phi += bvalues(d, state.quantum_number[d]); + d2phi *= psi[s]; + d2psi[s] = d2phi; + } } -template -void -SHOSetT::evaluate_check( - PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - using std::exp; - using std::sqrt; - - evaluate_vgl(r, psi, dpsi, d2psi); - - const int N = 6; - RealType H[N], dH[N], d2H[N], pre[N]; - RealType p[N], dp[N], d2p[N]; - - pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < N; ++n) - pre[n] = pre[n - 1] / sqrt(2. * n); - - for (int d = 0; d < QMCTraits::DIM; ++d) { - RealType x = (r[d] - center[d]) / length; - RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, - x5 = x * x * x * x * x; - H[0] = 1; - dH[0] = 0; - d2H[0] = 0; - H[1] = 2 * x; - dH[1] = 2; - d2H[1] = 0; - H[2] = 4 * x2 - 2; - dH[2] = 8 * x; - d2H[2] = 8; - H[3] = 8 * x3 - 12 * x; - dH[3] = 24 * x2 - 12; - d2H[3] = 48 * x; - H[4] = 16 * x4 - 48 * x2 + 12; - dH[4] = 64 * x3 - 96 * x; - d2H[4] = 192 * x2 - 96; - H[5] = 32 * x5 - 160 * x3 + 120 * x; - dH[5] = 160 * x4 - 480 * x2 + 120; - d2H[5] = 640 * x3 - 960 * x; - RealType g = exp(-x2 / 2); - for (int n = 0; n < N; ++n) { - p[n] = pre[n] * g * H[n]; - dp[n] = pre[n] * g * (-x * H[n] + dH[n]); - d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]); - } - app_log() << "eval check dim = " << d << " x = " << x << std::endl; - app_log() << " hermite check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) { - app_log() << " " << n << " " << H[n] << std::endl; - app_log() << " " << n << " " << hermite(d, n) << std::endl; - } - app_log() << " phi d0 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) { - app_log() << " " << n << " " << p[n] << std::endl; - app_log() << " " << n << " " << d0_values(d, n) << std::endl; - } - app_log() << " phi d1 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) { - app_log() << " " << n << " " << dp[n] / p[n] << std::endl; - app_log() << " " << n << " " << d1_values(d, n) << std::endl; - } - app_log() << " phi d2 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) { - app_log() << " " << n << " " << d2p[n] / p[n] << std::endl; - app_log() << " " << n << " " << d2_values(d, n) << std::endl; - } + using std::exp; + using std::sqrt; + + evaluate_vgl(r, psi, dpsi, d2psi); + + const int N = 6; + RealType H[N], dH[N], d2H[N], pre[N]; + RealType p[N], dp[N], d2p[N]; + + pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); + for (int n = 1; n < N; ++n) + pre[n] = pre[n - 1] / sqrt(2. * n); + + for (int d = 0; d < QMCTraits::DIM; ++d) + { + RealType x = (r[d] - center[d]) / length; + RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x; + H[0] = 1; + dH[0] = 0; + d2H[0] = 0; + H[1] = 2 * x; + dH[1] = 2; + d2H[1] = 0; + H[2] = 4 * x2 - 2; + dH[2] = 8 * x; + d2H[2] = 8; + H[3] = 8 * x3 - 12 * x; + dH[3] = 24 * x2 - 12; + d2H[3] = 48 * x; + H[4] = 16 * x4 - 48 * x2 + 12; + dH[4] = 64 * x3 - 96 * x; + d2H[4] = 192 * x2 - 96; + H[5] = 32 * x5 - 160 * x3 + 120 * x; + dH[5] = 160 * x4 - 480 * x2 + 120; + d2H[5] = 640 * x3 - 960 * x; + RealType g = exp(-x2 / 2); + for (int n = 0; n < N; ++n) + { + p[n] = pre[n] * g * H[n]; + dp[n] = pre[n] * g * (-x * H[n] + dH[n]); + d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]); + } + app_log() << "eval check dim = " << d << " x = " << x << std::endl; + app_log() << " hermite check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + { + app_log() << " " << n << " " << H[n] << std::endl; + app_log() << " " << n << " " << hermite(d, n) << std::endl; } + app_log() << " phi d0 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + { + app_log() << " " << n << " " << p[n] << std::endl; + app_log() << " " << n << " " << d0_values(d, n) << std::endl; + } + app_log() << " phi d1 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + { + app_log() << " " << n << " " << dp[n] / p[n] << std::endl; + app_log() << " " << n << " " << d1_values(d, n) << std::endl; + } + app_log() << " phi d2 check" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + { + app_log() << " " << n << " " << d2p[n] / p[n] << std::endl; + app_log() << " " << n << " " << d2_values(d, n) << std::endl; + } + } } -template -void -SHOSetT::test_derivatives() +template +void SHOSetT::test_derivatives() { - int n = 3; - PosType c = 5.123; - PosType L = 1.0; - PosType drg = L / n; - PosType dr = L / 1000; - int nphi = state_info.size(); - - PosType o2dr, odr2; - - ValueVector vpsi, vpsitmp; - GradVector vdpsi, vdpsin; - ValueVector vd2psi, vd2psin; - - vpsi.resize(nphi); - vdpsi.resize(nphi); - vd2psi.resize(nphi); - - vpsitmp.resize(nphi); - vdpsin.resize(nphi); - vd2psin.resize(nphi); - - ValueVector psi(&vpsi[0], this->size()); - GradVector dpsi(&vdpsi[0], this->size()); - ValueVector d2psi(&vd2psi[0], this->size()); - - ValueVector psitmp(&vpsitmp[0], this->size()); - GradVector dpsin(&vdpsin[0], this->size()); - ValueVector d2psin(&vd2psin[0], this->size()); - - app_log() << " loading dr" << std::endl; - - RealType odr2sum = 0.0; - for (int d = 0; d < QMCTraits::DIM; ++d) { - RealType odr = 1.0 / dr[d]; - o2dr[d] = .5 * odr; - odr2[d] = odr * odr; - odr2sum += odr2[d]; - } + int n = 3; + PosType c = 5.123; + PosType L = 1.0; + PosType drg = L / n; + PosType dr = L / 1000; + int nphi = state_info.size(); + + PosType o2dr, odr2; + + ValueVector vpsi, vpsitmp; + GradVector vdpsi, vdpsin; + ValueVector vd2psi, vd2psin; + + vpsi.resize(nphi); + vdpsi.resize(nphi); + vd2psi.resize(nphi); + + vpsitmp.resize(nphi); + vdpsin.resize(nphi); + vd2psin.resize(nphi); + + ValueVector psi(&vpsi[0], this->size()); + GradVector dpsi(&vdpsi[0], this->size()); + ValueVector d2psi(&vd2psi[0], this->size()); + + ValueVector psitmp(&vpsitmp[0], this->size()); + GradVector dpsin(&vdpsin[0], this->size()); + ValueVector d2psin(&vd2psin[0], this->size()); + + app_log() << " loading dr" << std::endl; + + RealType odr2sum = 0.0; + for (int d = 0; d < QMCTraits::DIM; ++d) + { + RealType odr = 1.0 / dr[d]; + o2dr[d] = .5 * odr; + odr2[d] = odr * odr; + odr2sum += odr2[d]; + } + + app_log() << "SHOSet::test_derivatives" << std::endl; + + const SimulationCellT simulation_cell; + ParticleSetT Ps(simulation_cell); + + int p = 0; + PosType r, rtmp; + for (int i = 0; i < n; ++i) + { + r[0] = c[0] + i * drg[0]; + for (int j = 0; j < n; ++j) + { + r[1] = c[1] + j * drg[1]; + for (int k = 0; k < n; ++k) + { + r[2] = c[2] + k * drg[2]; + + evaluate_vgl(r, psi, dpsi, d2psi); + + for (int m = 0; m < nphi; ++m) + d2psin[m] = -2 * odr2sum * psi[m]; + for (int d = 0; d < QMCTraits::DIM; ++d) + { + rtmp = r; + rtmp[d] += dr[d]; + evaluate_v(rtmp, psitmp); + for (int m = 0; m < nphi; ++m) + { + T phi = psitmp[m]; + dpsin[m][d] = phi * o2dr[d]; + d2psin[m] += phi * odr2[d]; + } + rtmp = r; + rtmp[d] -= dr[d]; + evaluate_v(rtmp, psitmp); + for (int m = 0; m < nphi; ++m) + { + T phi = psitmp[m]; + dpsin[m][d] -= phi * o2dr[d]; + d2psin[m] += phi * odr2[d]; + } + } - app_log() << "SHOSet::test_derivatives" << std::endl; - - const SimulationCellT simulation_cell; - ParticleSetT Ps(simulation_cell); - - int p = 0; - PosType r, rtmp; - for (int i = 0; i < n; ++i) { - r[0] = c[0] + i * drg[0]; - for (int j = 0; j < n; ++j) { - r[1] = c[1] + j * drg[1]; - for (int k = 0; k < n; ++k) { - r[2] = c[2] + k * drg[2]; - - evaluate_vgl(r, psi, dpsi, d2psi); - - for (int m = 0; m < nphi; ++m) - d2psin[m] = -2 * odr2sum * psi[m]; - for (int d = 0; d < QMCTraits::DIM; ++d) { - rtmp = r; - rtmp[d] += dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) { - T phi = psitmp[m]; - dpsin[m][d] = phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } - rtmp = r; - rtmp[d] -= dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) { - T phi = psitmp[m]; - dpsin[m][d] -= phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } - } - - RealType dphi_diff = 0.0; - RealType d2phi_diff = 0.0; - for (int m = 0; m < nphi; ++m) - for (int d = 0; d < QMCTraits::DIM; ++d) - dphi_diff = std::max(dphi_diff, - std::abs(dpsi[m][d] - dpsin[m][d]) / - std::abs(dpsin[m][d])); - for (int m = 0; m < nphi; ++m) - d2phi_diff = std::max(d2phi_diff, - std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); - app_log() << " " << p << " " << dphi_diff << " " << d2phi_diff - << std::endl; - app_log() << " derivatives" << std::endl; - for (int m = 0; m < nphi; ++m) { - std::string qn = ""; - for (int d = 0; d < QMCTraits::DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() << " " << qn; - for (int d = 0; d < QMCTraits::DIM; ++d) - app_log() << real(dpsi[m][d]) << " "; - app_log() << std::endl; - app_log() << " " << qn; - for (int d = 0; d < QMCTraits::DIM; ++d) - app_log() << real(dpsin[m][d]) << " "; - app_log() << std::endl; - } - app_log() << " laplacians" << std::endl; - PosType x = r / length; - for (int m = 0; m < nphi; ++m) { - std::string qn = ""; - for (int d = 0; d < QMCTraits::DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() - << " " << qn << real(d2psi[m] / psi[m]) << std::endl; - app_log() << " " << qn << real(d2psin[m] / psi[m]) - << std::endl; - } - p++; - } + RealType dphi_diff = 0.0; + RealType d2phi_diff = 0.0; + for (int m = 0; m < nphi; ++m) + for (int d = 0; d < QMCTraits::DIM; ++d) + dphi_diff = std::max(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d])); + for (int m = 0; m < nphi; ++m) + d2phi_diff = std::max(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); + app_log() << " " << p << " " << dphi_diff << " " << d2phi_diff << std::endl; + app_log() << " derivatives" << std::endl; + for (int m = 0; m < nphi; ++m) + { + std::string qn = ""; + for (int d = 0; d < QMCTraits::DIM; ++d) + qn += int2string(state_info[m].quantum_number[d]) + " "; + app_log() << " " << qn; + for (int d = 0; d < QMCTraits::DIM; ++d) + app_log() << real(dpsi[m][d]) << " "; + app_log() << std::endl; + app_log() << " " << qn; + for (int d = 0; d < QMCTraits::DIM; ++d) + app_log() << real(dpsin[m][d]) << " "; + app_log() << std::endl; } + app_log() << " laplacians" << std::endl; + PosType x = r / length; + for (int m = 0; m < nphi; ++m) + { + std::string qn = ""; + for (int d = 0; d < QMCTraits::DIM; ++d) + qn += int2string(state_info[m].quantum_number[d]) + " "; + app_log() << " " << qn << real(d2psi[m] / psi[m]) << std::endl; + app_log() << " " << qn << real(d2psin[m] / psi[m]) << std::endl; + } + p++; + } } + } - app_log() << "end SHOSet::test_derivatives" << std::endl; + app_log() << "end SHOSet::test_derivatives" << std::endl; } -template -void -SHOSetT::test_overlap() +template +void SHOSetT::test_overlap() { - app_log() << "SHOSet::test_overlap" << std::endl; + app_log() << "SHOSet::test_overlap" << std::endl; - // linear - int d = 0; + // linear + int d = 0; - app_log() << " length = " << length << std::endl; - app_log() << " prefactors" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - app_log() << " " << n << " " << prefactors[n] << std::endl; + app_log() << " length = " << length << std::endl; + app_log() << " prefactors" << std::endl; + for (int n = 0; n < qn_max[d]; ++n) + app_log() << " " << n << " " << prefactors[n] << std::endl; + + app_log() << " 1d overlap" << std::endl; - app_log() << " 1d overlap" << std::endl; + ValueVector vpsi; + vpsi.resize(this->size()); + ValueVector psi(&vpsi[0], this->size()); - ValueVector vpsi; - vpsi.resize(this->size()); - ValueVector psi(&vpsi[0], this->size()); + double xmax = 4.0; + double dx = .1; + double dr = length * dx; - double xmax = 4.0; - double dx = .1; - double dr = length * dx; + int nphi = qn_max[d]; + Array omat; + omat.resize(nphi, nphi); + for (int i = 0; i < nphi; ++i) + for (int j = 0; j < nphi; ++j) + omat(i, j) = 0.0; + + PosType xp = 0.0; + for (double x = -xmax; x < xmax; x += dx) + { + xp[d] = x; + evaluate_hermite(xp); + evaluate_d0(xp, psi); - int nphi = qn_max[d]; - Array omat; - omat.resize(nphi, nphi); for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; + for (int j = 0; j < nphi; ++j) + omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr; + } - PosType xp = 0.0; - for (double x = -xmax; x < xmax; x += dx) { - xp[d] = x; + for (int i = 0; i < nphi; ++i) + { + app_log() << std::endl; + for (int j = 0; j < nphi; ++j) + app_log() << omat(i, j) << " "; + } + app_log() << std::endl; + + // volumetric + app_log() << " 3d overlap" << std::endl; + double dV = dr * dr * dr; + nphi = this->size(); + omat.resize(nphi, nphi); + for (int i = 0; i < nphi; ++i) + for (int j = 0; j < nphi; ++j) + omat(i, j) = 0.0; + for (double x = -xmax; x < xmax; x += dx) + for (double y = -xmax; y < xmax; y += dx) + for (double z = -xmax; z < xmax; z += dx) + { + xp[0] = x; + xp[1] = y; + xp[2] = z; evaluate_hermite(xp); evaluate_d0(xp, psi); for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr; - } - - for (int i = 0; i < nphi; ++i) { - app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } - app_log() << std::endl; - - // volumetric - app_log() << " 3d overlap" << std::endl; - double dV = dr * dr * dr; - nphi = this->size(); - omat.resize(nphi, nphi); - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; - for (double x = -xmax; x < xmax; x += dx) - for (double y = -xmax; y < xmax; y += dx) - for (double z = -xmax; z < xmax; z += dx) { - xp[0] = x; - xp[1] = y; - xp[2] = z; - evaluate_hermite(xp); - evaluate_d0(xp, psi); - - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += std::abs(psi[i] * psi[j]) * dV; - } - for (int i = 0; i < nphi; ++i) { - app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } + for (int j = 0; j < nphi; ++j) + omat(i, j) += std::abs(psi[i] * psi[j]) * dV; + } + for (int i = 0; i < nphi; ++i) + { app_log() << std::endl; + for (int j = 0; j < nphi; ++j) + app_log() << omat(i, j) << " "; + } + app_log() << std::endl; - app_log() << "end SHOSet::test_overlap" << std::endl; + app_log() << "end SHOSet::test_overlap" << std::endl; } -template -void -SHOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, - GGGMatrix& grad_grad_grad_logdet) +template +void SHOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) { - not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); + not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); } -template -void -SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) { - not_implemented( - "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); + not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); } -template -void -SHOSetT::evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { - not_implemented( - "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); + not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); } -template -void -SHOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& gradphi) +template +void SHOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) { - not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); + not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); } -template -void -SHOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) +template +void SHOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) { - not_implemented( - "evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); + not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h index d8e89e9e0ec..ab3ab284e11 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h @@ -23,153 +23,133 @@ namespace qmcplusplus { struct SHOState : public SPOInfo { - TinyVector quantum_number; - - SHOState() - { - quantum_number = -1; - energy = 0.0; - } - - ~SHOState() override - { - } - - inline void - set(TinyVector qn, RealType e) - { - quantum_number = qn; - energy = e; - } - - inline void - sho_report(const std::string& pad = "") const - { - app_log() << pad << "qn=" << quantum_number << " e=" << energy - << std::endl; - } + TinyVector quantum_number; + + SHOState() + { + quantum_number = -1; + energy = 0.0; + } + + ~SHOState() override {} + + inline void set(TinyVector qn, RealType e) + { + quantum_number = qn; + energy = e; + } + + inline void sho_report(const std::string& pad = "") const + { + app_log() << pad << "qn=" << quantum_number << " e=" << energy << std::endl; + } }; -template +template class SHOSetT : public SPOSetT { public: - using GradVector = typename SPOSetT::GradVector; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using value_type = typename ValueMatrix::value_type; - using grad_type = typename GradMatrix::value_type; - using RealType = typename SPOSetT::RealType; - using PosType = TinyVector; - using HessType = typename OrbitalSetTraits::HessType; - using HessMatrix = typename OrbitalSetTraits::HessMatrix; - using GGGType = TinyVector; - using GGGVector = Vector; - using GGGMatrix = Matrix; - - RealType length; - PosType center; - - int nmax; - TinyVector qn_max; - std::vector state_info; - std::vector prefactors; - Array hermite; - Array bvalues; - Array d0_values; - Array d1_values; - Array d2_values; - - // construction/destruction - SHOSetT(const std::string& my_name, RealType l, PosType c, - const std::vector& sho_states); - - ~SHOSetT() override; - - std::string - getClassName() const override - { - return "SHOSet"; - } - - void - initialize(); - - // SPOSet interface methods - std::unique_ptr> - makeClone() const override; - - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - // local functions - void - evaluate_v(PosType r, ValueVector& psi); - void - evaluate_vgl( - PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void - evaluate_hermite(const PosType& xpos); - void - evaluate_d0(const PosType& xpos, ValueVector& psi); - void - evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); - void - evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); - void - report(const std::string& pad = "") const override; - void - test_derivatives(); - void - test_overlap(); - void - evaluate_check( - PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - // empty methods - /// number of orbitals is determined only by initial request - inline void - setOrbitalSetSize(int norbs) override - { - } - - /// unimplemented functions call this to abort - inline void - not_implemented(const std::string& method) - { - APP_ABORT("SHOSet::" + method + " has not been implemented."); - } - - // methods to be implemented in the future (possibly) - void - evaluateThirdDeriv(const ParticleSetT& P, int first, int last, - GGGMatrix& dddlogdet) override; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, - GradMatrix& gradphi) override; - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& dphi, - HessMatrix& ddphi, GradMatrix& dlapl_phi) override; + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using value_type = typename ValueMatrix::value_type; + using grad_type = typename GradMatrix::value_type; + using RealType = typename SPOSetT::RealType; + using PosType = TinyVector; + using HessType = typename OrbitalSetTraits::HessType; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGType = TinyVector; + using GGGVector = Vector; + using GGGMatrix = Matrix; + + RealType length; + PosType center; + + int nmax; + TinyVector qn_max; + std::vector state_info; + std::vector prefactors; + Array hermite; + Array bvalues; + Array d0_values; + Array d1_values; + Array d2_values; + + // construction/destruction + SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states); + + ~SHOSetT() override; + + std::string getClassName() const override { return "SHOSet"; } + + void initialize(); + + // SPOSet interface methods + std::unique_ptr> makeClone() const override; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + // local functions + void evaluate_v(PosType r, ValueVector& psi); + void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + void evaluate_hermite(const PosType& xpos); + void evaluate_d0(const PosType& xpos, ValueVector& psi); + void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); + void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); + void report(const std::string& pad = "") const override; + void test_derivatives(); + void test_overlap(); + void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + // empty methods + /// number of orbitals is determined only by initial request + inline void setOrbitalSetSize(int norbs) override {} + + /// unimplemented functions call this to abort + inline void not_implemented(const std::string& method) + { + APP_ABORT("SHOSet::" + method + " has not been implemented."); + } + + // methods to be implemented in the future (possibly) + void evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& dddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet, + GGGMatrix& dddlogdet) override; + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override; + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& dphi, + HessMatrix& ddphi, + GradMatrix& dlapl_phi) override; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp index 022d6db4a50..578c38a4ca6 100644 --- a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp @@ -30,894 +30,877 @@ namespace qmcplusplus { -template -AOBasisBuilderT::AOBasisBuilderT( - const std::string& eName, Communicate* comm) : - MPIObjectBase(comm), - addsignforM(false), - expandlm(GAUSSIAN_EXPAND), - Morder("gaussian"), - sph("default"), - basisType("Numerical"), - elementType(eName), - Normalized("yes") +template +AOBasisBuilderT::AOBasisBuilderT(const std::string& eName, Communicate* comm) + : MPIObjectBase(comm), + addsignforM(false), + expandlm(GAUSSIAN_EXPAND), + Morder("gaussian"), + sph("default"), + basisType("Numerical"), + elementType(eName), + Normalized("yes") { - // mmorales: for "Cartesian Gaussian", m is an integer that maps - // the component to Gamess notation, see - // Numerics/CartesianTensor.h - nlms_id["n"] = q_n; - nlms_id["l"] = q_l; - nlms_id["m"] = q_m; - nlms_id["s"] = q_s; + // mmorales: for "Cartesian Gaussian", m is an integer that maps + // the component to Gamess notation, see + // Numerics/CartesianTensor.h + nlms_id["n"] = q_n; + nlms_id["l"] = q_l; + nlms_id["m"] = q_m; + nlms_id["s"] = q_s; } -template -bool -AOBasisBuilderT::put(xmlNodePtr cur) +template +bool AOBasisBuilderT::put(xmlNodePtr cur) { - ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)"); - // Register valid attributes attributes - OhmmsAttributeSet aAttrib; - aAttrib.add(basisType, "type"); - aAttrib.add(sph, "angular"); - aAttrib.add(addsignforM, "expM"); - aAttrib.add(Morder, "expandYlm"); - aAttrib.add(Normalized, "normalized"); - aAttrib.put(cur); - PRE.echo(cur); - if (sph == "spherical") - addsignforM = 1; // include (-1)^m - - if (Morder == "gaussian") - expandlm = GAUSSIAN_EXPAND; - else if (Morder == "natural") - expandlm = NATURAL_EXPAND; - else if (Morder == "no") - expandlm = DONOT_EXPAND; - else if (Morder == "pyscf") { - expandlm = MOD_NATURAL_EXPAND; - addsignforM = 1; - if (sph != "spherical") { - myComm->barrier_and_abort( - " Error: expandYlm='pyscf' only compatible with " - "angular='spherical'. Aborting.\n"); - } - } - - if (sph == "cartesian" || Morder == "Gamess") { - expandlm = CARTESIAN_EXPAND; - addsignforM = 0; - } - - if (Morder == "Dirac") { - expandlm = DIRAC_CARTESIAN_EXPAND; - addsignforM = 0; - if (sph != "cartesian") - myComm->barrier_and_abort( - " Error: expandYlm='Dirac' only compatible with " - "angular='cartesian'. Aborting\n"); + ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)"); + // Register valid attributes attributes + OhmmsAttributeSet aAttrib; + aAttrib.add(basisType, "type"); + aAttrib.add(sph, "angular"); + aAttrib.add(addsignforM, "expM"); + aAttrib.add(Morder, "expandYlm"); + aAttrib.add(Normalized, "normalized"); + aAttrib.put(cur); + PRE.echo(cur); + if (sph == "spherical") + addsignforM = 1; // include (-1)^m + + if (Morder == "gaussian") + expandlm = GAUSSIAN_EXPAND; + else if (Morder == "natural") + expandlm = NATURAL_EXPAND; + else if (Morder == "no") + expandlm = DONOT_EXPAND; + else if (Morder == "pyscf") + { + expandlm = MOD_NATURAL_EXPAND; + addsignforM = 1; + if (sph != "spherical") + { + myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); } - - // Numerical basis is a special case - if (basisType == "Numerical") - myComm->barrier_and_abort( - "Purely numerical atomic orbitals are not supported any longer."); - - return true; + } + + if (sph == "cartesian" || Morder == "Gamess") + { + expandlm = CARTESIAN_EXPAND; + addsignforM = 0; + } + + if (Morder == "Dirac") + { + expandlm = DIRAC_CARTESIAN_EXPAND; + addsignforM = 0; + if (sph != "cartesian") + myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); + } + + // Numerical basis is a special case + if (basisType == "Numerical") + myComm->barrier_and_abort("Purely numerical atomic orbitals are not supported any longer."); + + return true; } -template -bool -AOBasisBuilderT::putH5(hdf_archive& hin) +template +bool AOBasisBuilderT::putH5(hdf_archive& hin) { - ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)"); - std::string CenterID, basisName; - - if (myComm->rank() == 0) { - hin.read(sph, "angular"); - hin.read(CenterID, "elementType"); - hin.read(Normalized, "normalized"); - hin.read(Morder, "expandYlm"); - hin.read(basisName, "name"); + ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)"); + std::string CenterID, basisName; + + if (myComm->rank() == 0) + { + hin.read(sph, "angular"); + hin.read(CenterID, "elementType"); + hin.read(Normalized, "normalized"); + hin.read(Morder, "expandYlm"); + hin.read(basisName, "name"); + } + + myComm->bcast(sph); + myComm->bcast(Morder); + myComm->bcast(CenterID); + myComm->bcast(Normalized); + myComm->bcast(basisName); + myComm->bcast(basisType); + myComm->bcast(addsignforM); + + if (sph == "spherical") + addsignforM = 1; // include (-1)^m + + if (Morder == "gaussian") + expandlm = GAUSSIAN_EXPAND; + else if (Morder == "natural") + expandlm = NATURAL_EXPAND; + else if (Morder == "no") + expandlm = DONOT_EXPAND; + else if (Morder == "pyscf") + { + expandlm = MOD_NATURAL_EXPAND; + addsignforM = 1; + if (sph != "spherical") + { + myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); } - - myComm->bcast(sph); - myComm->bcast(Morder); - myComm->bcast(CenterID); - myComm->bcast(Normalized); - myComm->bcast(basisName); - myComm->bcast(basisType); - myComm->bcast(addsignforM); - - if (sph == "spherical") - addsignforM = 1; // include (-1)^m - - if (Morder == "gaussian") - expandlm = GAUSSIAN_EXPAND; - else if (Morder == "natural") - expandlm = NATURAL_EXPAND; - else if (Morder == "no") - expandlm = DONOT_EXPAND; - else if (Morder == "pyscf") { - expandlm = MOD_NATURAL_EXPAND; - addsignforM = 1; - if (sph != "spherical") { - myComm->barrier_and_abort( - " Error: expandYlm='pyscf' only compatible with " - "angular='spherical'. Aborting.\n"); - } - } - - if (sph == "cartesian" || Morder == "Gamess") { - expandlm = CARTESIAN_EXPAND; - addsignforM = 0; - } - - if (Morder == "Dirac") { - expandlm = DIRAC_CARTESIAN_EXPAND; - addsignforM = 0; - if (sph != "cartesian") - myComm->barrier_and_abort( - " Error: expandYlm='Dirac' only compatible with " - "angular='cartesian'. Aborting\n"); - } - app_log() << R"(" << std::endl; - - return true; + } + + if (sph == "cartesian" || Morder == "Gamess") + { + expandlm = CARTESIAN_EXPAND; + addsignforM = 0; + } + + if (Morder == "Dirac") + { + expandlm = DIRAC_CARTESIAN_EXPAND; + addsignforM = 0; + if (sph != "cartesian") + myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); + } + app_log() << R"(" << std::endl; + + return true; } -template -std::unique_ptr -AOBasisBuilderT::createAOSet(xmlNodePtr cur) +template +std::unique_ptr AOBasisBuilderT::createAOSet(xmlNodePtr cur) { - ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)"); - app_log() << " AO BasisSet for " << elementType << "\n"; - - if (expandlm != CARTESIAN_EXPAND) { - if (addsignforM) - app_log() << " Spherical Harmonics contain (-1)^m factor" - << std::endl; - else - app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" - << std::endl; + ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)"); + app_log() << " AO BasisSet for " << elementType << "\n"; + + if (expandlm != CARTESIAN_EXPAND) + { + if (addsignforM) + app_log() << " Spherical Harmonics contain (-1)^m factor" << std::endl; + else + app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" << std::endl; + } + + switch (expandlm) + { + case (GAUSSIAN_EXPAND): + app_log() << " Angular momentum m expanded according to Gaussian" << std::endl; + break; + case (NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; + break; + case (MOD_NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; + break; + case (CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; + break; + case (DIRAC_CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; + break; + default: + app_log() << " Angular momentum m is explicitly given." << std::endl; + } + + QuantumNumberType nlms; + std::string rnl; + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center + // process the basic property: maximun angular momentum, the number of basis + // functions to be added + std::vector radGroup; + xmlNodePtr cur1 = cur->xmlChildrenNode; + xmlNodePtr gptr = 0; + while (cur1 != NULL) + { + std::string cname1((const char*)(cur1->name)); + if (cname1 == "basisGroup") + { + radGroup.push_back(cur1); + const int l = std::stoi(getXMLAttributeValue(cur1, "l")); + Lmax = std::max(Lmax, l); + // expect that only Rnl is given + if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) + num += (l + 1) * (l + 2) / 2; + else if (expandlm) + num += 2 * l + 1; + else + num++; } - - switch (expandlm) { - case (GAUSSIAN_EXPAND): - app_log() << " Angular momentum m expanded according to Gaussian" - << std::endl; - break; - case (NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l" - << std::endl; - break; - case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the " - "exception of L=1 (1,-1,0)" - << std::endl; - break; - case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx " - "y^ly z^lz according to Gamess" - << std::endl; - break; - case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in " - "DIRAC ordering" - << std::endl; - break; - default: - app_log() << " Angular momentum m is explicitly given." << std::endl; + else if (cname1 == "grid") + { + gptr = cur1; } - - QuantumNumberType nlms; - std::string rnl; - int Lmax(0); // maxmimum angular momentum of this center - int num(0); // the number of localized basis functions of this center - // process the basic property: maximun angular momentum, the number of basis - // functions to be added - std::vector radGroup; - xmlNodePtr cur1 = cur->xmlChildrenNode; - xmlNodePtr gptr = 0; - while (cur1 != NULL) { - std::string cname1((const char*)(cur1->name)); - if (cname1 == "basisGroup") { - radGroup.push_back(cur1); - const int l = std::stoi(getXMLAttributeValue(cur1, "l")); - Lmax = std::max(Lmax, l); - // expect that only Rnl is given - if (expandlm == CARTESIAN_EXPAND || - expandlm == DIRAC_CARTESIAN_EXPAND) - num += (l + 1) * (l + 2) / 2; - else if (expandlm) - num += 2 * l + 1; - else - num++; - } - else if (cname1 == "grid") { - gptr = cur1; + cur1 = cur1->next; + } + + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + auto aos = std::make_unique(Lmax, addsignforM); + aos->LM.resize(num); + aos->NL.resize(num); + + // Now, add distinct Radial Orbitals and (l,m) channels + RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); + radFuncBuilder.Normalized = (Normalized == "yes"); + radFuncBuilder.addGrid(gptr, basisType); // assign a radial grid for the new center + std::vector::iterator it(radGroup.begin()); + std::vector::iterator it_end(radGroup.end()); + std::vector all_nl; + while (it != it_end) + { + cur1 = (*it); + xmlAttrPtr att = cur1->properties; + while (att != NULL) + { + std::string aname((const char*)(att->name)); + if (aname == "rid" || aname == "id") + // accept id/rid + { + rnl = (const char*)(att->children->content); + } + else + { + std::map::iterator iit = nlms_id.find(aname); + if (iit != nlms_id.end()) + // valid for n,l,m,s + { + nlms[(*iit).second] = atoi((const char*)(att->children->content)); } - cur1 = cur1->next; + } + att = att->next; } - - // create a new set of atomic orbitals sharing a center with (Lmax, num) - // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) - auto aos = std::make_unique(Lmax, addsignforM); - aos->LM.resize(num); - aos->NL.resize(num); - - // Now, add distinct Radial Orbitals and (l,m) channels - RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); - radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGrid( - gptr, basisType); // assign a radial grid for the new center - std::vector::iterator it(radGroup.begin()); - std::vector::iterator it_end(radGroup.end()); - std::vector all_nl; - while (it != it_end) { - cur1 = (*it); - xmlAttrPtr att = cur1->properties; - while (att != NULL) { - std::string aname((const char*)(att->name)); - if (aname == "rid" || aname == "id") - // accept id/rid - { - rnl = (const char*)(att->children->content); - } - else { - std::map::iterator iit = nlms_id.find(aname); - if (iit != nlms_id.end()) - // valid for n,l,m,s - { - nlms[(*iit).second] = - atoi((const char*)(att->children->content)); - } - } - att = att->next; - } - // add Ylm channels - app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " - << nlms[2] << " " << nlms[3] << std::endl; - std::map::iterator rnl_it = RnlID.find(rnl); - if (rnl_it == RnlID.end()) { - int nl = aos->RnlID.size(); - if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms)) - RnlID[rnl] = nl; - all_nl.push_back(nl); - } - else { - all_nl.push_back((*rnl_it).second); - } - ++it; + // add Ylm channels + app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; + std::map::iterator rnl_it = RnlID.find(rnl); + if (rnl_it == RnlID.end()) + { + int nl = aos->RnlID.size(); + if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms)) + RnlID[rnl] = nl; + all_nl.push_back(nl); } - - if (expandYlm(aos.get(), all_nl, expandlm) != num) - myComm->barrier_and_abort( - "expandYlm doesn't match the number of basis."); - radFuncBuilder.finalize(); - // aos->Rmax can be set small - // aos->setRmax(0); - aos->setBasisSetSize(-1); - app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() - << std::endl - << " Number of Radial functors = " << aos->RnlID.size() - << std::endl - << " Basis size = " << aos->getBasisSetSize() - << "\n\n"; - return aos; + else + { + all_nl.push_back((*rnl_it).second); + } + ++it; + } + + if (expandYlm(aos.get(), all_nl, expandlm) != num) + myComm->barrier_and_abort("expandYlm doesn't match the number of basis."); + radFuncBuilder.finalize(); + // aos->Rmax can be set small + // aos->setRmax(0); + aos->setBasisSetSize(-1); + app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() << std::endl + << " Number of Radial functors = " << aos->RnlID.size() << std::endl + << " Basis size = " << aos->getBasisSetSize() << "\n\n"; + return aos; } -template -std::unique_ptr -AOBasisBuilderT::createAOSetH5(hdf_archive& hin) +template +std::unique_ptr AOBasisBuilderT::createAOSetH5(hdf_archive& hin) { - ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)"); - app_log() << " AO BasisSet for " << elementType << "\n"; - - if (expandlm != CARTESIAN_EXPAND) { - if (addsignforM) - app_log() << " Spherical Harmonics contain (-1)^m factor" - << std::endl; - else - app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" - << std::endl; + ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)"); + app_log() << " AO BasisSet for " << elementType << "\n"; + + if (expandlm != CARTESIAN_EXPAND) + { + if (addsignforM) + app_log() << " Spherical Harmonics contain (-1)^m factor" << std::endl; + else + app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" << std::endl; + } + + switch (expandlm) + { + case (GAUSSIAN_EXPAND): + app_log() << " Angular momentum m expanded according to Gaussian" << std::endl; + break; + case (NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; + break; + case (MOD_NATURAL_EXPAND): + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; + break; + case (CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; + break; + case (DIRAC_CARTESIAN_EXPAND): + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; + break; + default: + app_log() << " Angular momentum m is explicitly given." << std::endl; + } + + QuantumNumberType nlms; + std::string rnl; + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center + + int numbasisgroups(0); + if (myComm->rank() == 0) + { + if (!hin.readEntry(numbasisgroups, "NbBasisGroups")) + PRE.error("Could not read NbBasisGroups in H5; Probably Corrupt H5 file", true); + } + myComm->bcast(numbasisgroups); + + for (int i = 0; i < numbasisgroups; i++) + { + std::string basisGroupID = "basisGroup" + std::to_string(i); + int l(0); + if (myComm->rank() == 0) + { + hin.push(basisGroupID); + hin.read(l, "l"); + hin.pop(); } - - switch (expandlm) { - case (GAUSSIAN_EXPAND): - app_log() << " Angular momentum m expanded according to Gaussian" - << std::endl; - break; - case (NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l" - << std::endl; - break; - case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the " - "exception of L=1 (1,-1,0)" - << std::endl; - break; - case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx " - "y^ly z^lz according to Gamess" - << std::endl; - break; - case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in " - "DIRAC ordering" - << std::endl; - break; - default: - app_log() << " Angular momentum m is explicitly given." << std::endl; - } - - QuantumNumberType nlms; - std::string rnl; - int Lmax(0); // maxmimum angular momentum of this center - int num(0); // the number of localized basis functions of this center - - int numbasisgroups(0); - if (myComm->rank() == 0) { - if (!hin.readEntry(numbasisgroups, "NbBasisGroups")) - PRE.error( - "Could not read NbBasisGroups in H5; Probably Corrupt H5 file", - true); + myComm->bcast(l); + + Lmax = std::max(Lmax, l); + // expect that only Rnl is given + if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) + num += (l + 1) * (l + 2) / 2; + else if (expandlm) + num += 2 * l + 1; + else + num++; + } + + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + auto aos = std::make_unique(Lmax, addsignforM); + aos->LM.resize(num); + aos->NL.resize(num); + + // Now, add distinct Radial Orbitals and (l,m) channels + RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); + radFuncBuilder.Normalized = (Normalized == "yes"); + radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center + std::vector all_nl; + for (int i = 0; i < numbasisgroups; i++) + { + std::string basisGroupID = "basisGroup" + std::to_string(i); + if (myComm->rank() == 0) + { + hin.push(basisGroupID); + hin.read(rnl, "rid"); + hin.read(nlms[0], "n"); + hin.read(nlms[1], "l"); } - myComm->bcast(numbasisgroups); - - for (int i = 0; i < numbasisgroups; i++) { - std::string basisGroupID = "basisGroup" + std::to_string(i); - int l(0); - if (myComm->rank() == 0) { - hin.push(basisGroupID); - hin.read(l, "l"); - hin.pop(); - } - myComm->bcast(l); - - Lmax = std::max(Lmax, l); - // expect that only Rnl is given - if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) - num += (l + 1) * (l + 2) / 2; - else if (expandlm) - num += 2 * l + 1; - else - num++; + myComm->bcast(rnl); + myComm->bcast(nlms[0]); + myComm->bcast(nlms[1]); + + // add Ylm channels + app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; + std::map::iterator rnl_it = RnlID.find(rnl); + if (rnl_it == RnlID.end()) + { + int nl = aos->RnlID.size(); + if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms)) + RnlID[rnl] = nl; + all_nl.push_back(nl); } - - // create a new set of atomic orbitals sharing a center with (Lmax, num) - // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) - auto aos = std::make_unique(Lmax, addsignforM); - aos->LM.resize(num); - aos->NL.resize(num); - - // Now, add distinct Radial Orbitals and (l,m) channels - RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); - radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center - std::vector all_nl; - for (int i = 0; i < numbasisgroups; i++) { - std::string basisGroupID = "basisGroup" + std::to_string(i); - if (myComm->rank() == 0) { - hin.push(basisGroupID); - hin.read(rnl, "rid"); - hin.read(nlms[0], "n"); - hin.read(nlms[1], "l"); - } - myComm->bcast(rnl); - myComm->bcast(nlms[0]); - myComm->bcast(nlms[1]); - - // add Ylm channels - app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " - << nlms[2] << " " << nlms[3] << std::endl; - std::map::iterator rnl_it = RnlID.find(rnl); - if (rnl_it == RnlID.end()) { - int nl = aos->RnlID.size(); - if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms)) - RnlID[rnl] = nl; - all_nl.push_back(nl); - } - else { - all_nl.push_back((*rnl_it).second); - } - - if (myComm->rank() == 0) - hin.pop(); + else + { + all_nl.push_back((*rnl_it).second); } - if (expandYlm(aos.get(), all_nl, expandlm) != num) - myComm->barrier_and_abort( - "expandYlm doesn't match the number of basis."); - radFuncBuilder.finalize(); - // aos->Rmax can be set small - // aos->setRmax(0); - aos->setBasisSetSize(-1); - app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() - << std::endl - << " Number of Radial functors = " << aos->RnlID.size() - << std::endl - << " Basis size = " << aos->getBasisSetSize() - << "\n\n"; - return aos; + if (myComm->rank() == 0) + hin.pop(); + } + + if (expandYlm(aos.get(), all_nl, expandlm) != num) + myComm->barrier_and_abort("expandYlm doesn't match the number of basis."); + radFuncBuilder.finalize(); + // aos->Rmax can be set small + // aos->setRmax(0); + aos->setBasisSetSize(-1); + app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() << std::endl + << " Number of Radial functors = " << aos->RnlID.size() << std::endl + << " Basis size = " << aos->getBasisSetSize() << "\n\n"; + return aos; } -template -int -AOBasisBuilderT::expandYlm( - COT* aos, std::vector& all_nl, int expandlm) +template +int AOBasisBuilderT::expandYlm(COT* aos, std::vector& all_nl, int expandlm) { - int num = 0; - if (expandlm == GAUSSIAN_EXPAND) { - app_log() << "Expanding Ylm according to Gaussian98" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << 2 * l + 1 - << " spherical orbitals for l= " << l << std::endl; - switch (l) { - case (0): - aos->LM[num] = aos->Ylm.index(0, 0); - aos->NL[num] = nl; - num++; - break; - case (1): // px(1),py(-1),pz(0) - aos->LM[num] = aos->Ylm.index(1, 1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, -1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, 0); - aos->NL[num] = nl; - num++; - break; - default: // 0,1,-1,2,-2,...,l,-l - aos->LM[num] = aos->Ylm.index(l, 0); - aos->NL[num] = nl; - num++; - for (int tm = 1; tm <= l; tm++) { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(l, -tm); - aos->NL[num] = nl; - num++; - } - break; - } + int num = 0; + if (expandlm == GAUSSIAN_EXPAND) + { + app_log() << "Expanding Ylm according to Gaussian98" << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) + { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << 2 * l + 1 << " spherical orbitals for l= " << l << std::endl; + switch (l) + { + case (0): + aos->LM[num] = aos->Ylm.index(0, 0); + aos->NL[num] = nl; + num++; + break; + case (1): // px(1),py(-1),pz(0) + aos->LM[num] = aos->Ylm.index(1, 1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, -1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, 0); + aos->NL[num] = nl; + num++; + break; + default: // 0,1,-1,2,-2,...,l,-l + aos->LM[num] = aos->Ylm.index(l, 0); + aos->NL[num] = nl; + num++; + for (int tm = 1; tm <= l; tm++) + { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(l, -tm); + aos->NL[num] = nl; + num++; } + break; + } } - else if (expandlm == MOD_NATURAL_EXPAND) { - app_log() - << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l" - << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) { - int l = aos->RnlID[nl][q_l]; - app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" - << std::endl; - if (l == 1) { - // px(1),py(-1),pz(0) - aos->LM[num] = aos->Ylm.index(1, 1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, -1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, 0); - aos->NL[num] = nl; - num++; - } - else { - for (int tm = -l; tm <= l; tm++, num++) { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - } - } + } + else if (expandlm == MOD_NATURAL_EXPAND) + { + app_log() << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l" << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) + { + int l = aos->RnlID[nl][q_l]; + app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" << std::endl; + if (l == 1) + { + // px(1),py(-1),pz(0) + aos->LM[num] = aos->Ylm.index(1, 1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, -1); + aos->NL[num] = nl; + num++; + aos->LM[num] = aos->Ylm.index(1, 0); + aos->NL[num] = nl; + num++; + } + else + { + for (int tm = -l; tm <= l; tm++, num++) + { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; } + } } - else if (expandlm == NATURAL_EXPAND) { - app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) { - int l = aos->RnlID[nl][q_l]; - app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" - << std::endl; - for (int tm = -l; tm <= l; tm++, num++) { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - } - } + } + else if (expandlm == NATURAL_EXPAND) + { + app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) + { + int l = aos->RnlID[nl][q_l]; + app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" << std::endl; + for (int tm = -l; tm <= l; tm++, num++) + { + aos->LM[num] = aos->Ylm.index(l, tm); + aos->NL[num] = nl; + } } - else if (expandlm == CARTESIAN_EXPAND) { - app_log() << "Expanding Ylm (angular function) according to Gamess " - "using cartesian gaussians" - << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << (l + 1) * (l + 2) / 2 - << " cartesian gaussian orbitals for l= " << l - << std::endl; - int nbefore = 0; - for (int i = 0; i < l; i++) - nbefore += (i + 1) * (i + 2) / 2; - for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) { - aos->LM[num] = nbefore + i; - aos->NL[num] = nl; - num++; - } - } + } + else if (expandlm == CARTESIAN_EXPAND) + { + app_log() << "Expanding Ylm (angular function) according to Gamess " + "using cartesian gaussians" + << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) + { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl; + int nbefore = 0; + for (int i = 0; i < l; i++) + nbefore += (i + 1) * (i + 2) / 2; + for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) + { + aos->LM[num] = nbefore + i; + aos->NL[num] = nl; + num++; + } } - else if (expandlm == DIRAC_CARTESIAN_EXPAND) { - app_log() << "Expanding Ylm (angular function) according to DIRAC " - "using cartesian gaussians" - << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << (l + 1) * (l + 2) / 2 - << " cartesian gaussian orbitals for l= " << l - << std::endl; - int nbefore = 0; - for (int i = 0; i < l; i++) - nbefore += (i + 1) * (i + 2) / 2; - switch (l) { - case (0): - aos->LM[num] = nbefore + 0; - aos->NL[num] = nl; - num++; - break; - case (1): - aos->LM[num] = nbefore + 0; - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; - aos->NL[num] = nl; - num++; - break; - case (2): - aos->LM[num] = nbefore + 0; // xx - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; // xy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; // xz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; // yy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; // yz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; // zz - aos->NL[num] = nl; - num++; - break; - case (3): - aos->LM[num] = nbefore + 0; // xxx - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; // xxy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; // xxz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; // xyy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; // xyz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; // xzz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; // yyy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; // yyz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; // yzz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; // zzz - aos->NL[num] = nl; - num++; - break; - case (4): - aos->LM[num] = nbefore + 0; // 400 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; // 310 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; // 301 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; // 220 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; // 211 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; // 202 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; // 130 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; // 121 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; // 112 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; // 103 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; // 040 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; // 031 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; // 022 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; // 013 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; // 004 - aos->NL[num] = nl; - num++; - break; - case (5): - aos->LM[num] = nbefore + 0; // 500 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; // 410 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; // 401 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; // 320 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 15; // 311 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; // 302 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; // 230 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 18; // 221 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 19; // 212 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; // 203 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; // 140 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 16; // 131 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 20; // 122 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 17; // 113 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; // 104 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; // 050 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; // 041 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; // 032 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; // 023 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; // 014 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; // 005 - aos->NL[num] = nl; - num++; - break; - case (6): - aos->LM[num] = nbefore + 0; // 600 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; // 510 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; // 501 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; // 420 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 15; // 411 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; // 402 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 18; // 330 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 21; // 321 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 22; // 312 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 19; // 303 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; // 240 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 23; // 231 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 27; // 222 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 25; // 213 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; // 204 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; // 150 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 16; // 141 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 24; // 132 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 26; // 123 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 17; // 114 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; // 105 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; // 060 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; // 051 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; // 042 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 20; // 033 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; // 024 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; // 015 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; // 006 - aos->NL[num] = nl; - num++; - break; - default: - myComm->barrier_and_abort( - "Cartesian Tensor only defined up to Lmax=6. Aborting\n"); - break; - } - } + } + else if (expandlm == DIRAC_CARTESIAN_EXPAND) + { + app_log() << "Expanding Ylm (angular function) according to DIRAC " + "using cartesian gaussians" + << std::endl; + for (int nl = 0; nl < aos->RnlID.size(); nl++) + { + int l = aos->RnlID[nl][q_l]; + app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl; + int nbefore = 0; + for (int i = 0; i < l; i++) + nbefore += (i + 1) * (i + 2) / 2; + switch (l) + { + case (0): + aos->LM[num] = nbefore + 0; + aos->NL[num] = nl; + num++; + break; + case (1): + aos->LM[num] = nbefore + 0; + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; + aos->NL[num] = nl; + num++; + break; + case (2): + aos->LM[num] = nbefore + 0; // xx + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // xy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // xz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // yy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // yz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // zz + aos->NL[num] = nl; + num++; + break; + case (3): + aos->LM[num] = nbefore + 0; // xxx + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // xxy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // xxz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // xyy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // xyz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // xzz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // yyy + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // yyz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // yzz + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // zzz + aos->NL[num] = nl; + num++; + break; + case (4): + aos->LM[num] = nbefore + 0; // 400 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 310 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 301 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 220 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 211 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 202 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 130 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 121 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 112 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 103 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 040 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 031 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 022 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 013 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 004 + aos->NL[num] = nl; + num++; + break; + case (5): + aos->LM[num] = nbefore + 0; // 500 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 410 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 401 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 320 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 15; // 311 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 302 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 230 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 18; // 221 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 19; // 212 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 203 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 140 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 16; // 131 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 20; // 122 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 17; // 113 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 104 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 050 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 041 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 032 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 023 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 014 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 005 + aos->NL[num] = nl; + num++; + break; + case (6): + aos->LM[num] = nbefore + 0; // 600 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 3; // 510 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 4; // 501 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 9; // 420 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 15; // 411 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 10; // 402 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 18; // 330 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 21; // 321 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 22; // 312 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 19; // 303 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 11; // 240 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 23; // 231 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 27; // 222 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 25; // 213 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 13; // 204 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 5; // 150 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 16; // 141 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 24; // 132 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 26; // 123 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 17; // 114 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 7; // 105 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 1; // 060 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 6; // 051 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 12; // 042 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 20; // 033 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 14; // 024 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 8; // 015 + aos->NL[num] = nl; + num++; + aos->LM[num] = nbefore + 2; // 006 + aos->NL[num] = nl; + num++; + break; + default: + myComm->barrier_and_abort("Cartesian Tensor only defined up to Lmax=6. Aborting\n"); + break; + } } - else { - for (int ind = 0; ind < all_nl.size(); ind++) { - int nl = all_nl[ind]; - int l = aos->RnlID[nl][q_l]; - int m = aos->RnlID[nl][q_m]; - // assign the index for real Spherical Harmonic with (l,m) - aos->LM[num] = aos->Ylm.index(l, m); - // assign the index for radial orbital with (n,l) - aos->NL[num] = nl; - // increment number of basis functions - num++; - } + } + else + { + for (int ind = 0; ind < all_nl.size(); ind++) + { + int nl = all_nl[ind]; + int l = aos->RnlID[nl][q_l]; + int m = aos->RnlID[nl][q_m]; + // assign the index for real Spherical Harmonic with (l,m) + aos->LM[num] = aos->Ylm.index(l, m); + // assign the index for radial orbital with (n,l) + aos->NL[num] = nl; + // increment number of basis functions + num++; } - return num; + } + return num; } -template class AOBasisBuilderT, - SoaCartesianTensor, double>>; -template class AOBasisBuilderT, - SoaCartesianTensor, std::complex>>; -template class AOBasisBuilderT, - SoaCartesianTensor, float>>; -template class AOBasisBuilderT, - SoaCartesianTensor, std::complex>>; - -template class AOBasisBuilderT, - SoaSphericalTensor, double>>; -template class AOBasisBuilderT, - SoaSphericalTensor, std::complex>>; -template class AOBasisBuilderT, - SoaSphericalTensor, float>>; -template class AOBasisBuilderT, - SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT, SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT, SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT, SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, double>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, double>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, float>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, float>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, double>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, double>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, float>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, float>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, double>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, double>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>>; -template class AOBasisBuilderT>, SoaCartesianTensor, float>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, double>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>>; -template class AOBasisBuilderT>, SoaSphericalTensor, float>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, float>>; template class AOBasisBuilderT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>>; + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp index 8ae6df22620..d265cb9959b 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp @@ -27,772 +27,794 @@ namespace qmcplusplus { -template -void -CuspCorrectionConstructionT::splitPhiEta(int center, - const std::vector& corrCenter, LCAOrbitalSetT& Phi, - LCAOrbitalSetT& Eta) +template +void CuspCorrectionConstructionT::splitPhiEta(int center, + const std::vector& corrCenter, + LCAOrbitalSetT& Phi, + LCAOrbitalSetT& Eta) { - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); - std::vector correct_this_center(corrCenter.size(), false); - correct_this_center[center] = corrCenter[center]; - - Phi.myBasisSet->queryOrbitalsForSType(correct_this_center, is_s_orbital); - - int nOrbs = Phi.getOrbitalSetSize(); - int bss = Phi.getBasisSetSize(); - - for (int i = 0; i < bss; i++) { - if (is_s_orbital[i]) { - auto& cref(*(Eta.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; // Eta->C(k,i) = 0.0; - } - else { - auto& cref(*(Phi.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; // Phi->C(k,i) = 0.0; - } + std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); + std::vector correct_this_center(corrCenter.size(), false); + correct_this_center[center] = corrCenter[center]; + + Phi.myBasisSet->queryOrbitalsForSType(correct_this_center, is_s_orbital); + + int nOrbs = Phi.getOrbitalSetSize(); + int bss = Phi.getBasisSetSize(); + + for (int i = 0; i < bss; i++) + { + if (is_s_orbital[i]) + { + auto& cref(*(Eta.C)); + for (int k = 0; k < nOrbs; k++) + cref(k, i) = 0.0; // Eta->C(k,i) = 0.0; + } + else + { + auto& cref(*(Phi.C)); + for (int k = 0; k < nOrbs; k++) + cref(k, i) = 0.0; // Phi->C(k,i) = 0.0; } + } } -template -void -CuspCorrectionConstructionT::removeSTypeOrbitals( - const std::vector& corrCenter, LCAOrbitalSetT& Phi) +template +void CuspCorrectionConstructionT::removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSetT& Phi) { - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); + std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); - Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital); + Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital); - int nOrbs = Phi.getOrbitalSetSize(); - int bss = Phi.getBasisSetSize(); + int nOrbs = Phi.getOrbitalSetSize(); + int bss = Phi.getBasisSetSize(); - for (int i = 0; i < bss; i++) { - if (is_s_orbital[i]) { - auto& cref(*(Phi.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; - } + for (int i = 0; i < bss; i++) + { + if (is_s_orbital[i]) + { + auto& cref(*(Phi.C)); + for (int k = 0; k < nOrbs; k++) + cref(k, i) = 0.0; } + } } // Will be the corrected value for r < rc and the original wavefunction for r > // rc -template -void -CuspCorrectionConstructionT::computeRadialPhiBar(ParticleSetT* targetP, - ParticleSetT* sourceP, int curOrb_, int curCenter_, SPOSetT* Phi, - Vector& xgrid, Vector& rad_orb, - const CuspCorrectionParametersT& data) +template +void CuspCorrectionConstructionT::computeRadialPhiBar(ParticleSetT* targetP, + ParticleSetT* sourceP, + int curOrb_, + int curCenter_, + SPOSetT* Phi, + Vector& xgrid, + Vector& rad_orb, + const CuspCorrectionParametersT& data) { - OneMolecularOrbitalT phiMO(targetP, sourceP, Phi); - phiMO.changeOrbital(curCenter_, curOrb_); - CuspCorrectionT cusp(data); - - for (int i = 0; i < xgrid.size(); i++) { - rad_orb[i] = phiBar(cusp, xgrid[i], phiMO); - } + OneMolecularOrbitalT phiMO(targetP, sourceP, Phi); + phiMO.changeOrbital(curCenter_, curOrb_); + CuspCorrectionT cusp(data); + + for (int i = 0; i < xgrid.size(); i++) + { + rad_orb[i] = phiBar(cusp, xgrid[i], phiMO); + } } // Get the ideal local energy at one point // Eq. 17 in the paper. Coefficients are taken from the paper. -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::getOneIdealLocalEnergy( - RealType r, RealType Z, RealType beta0) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getOneIdealLocalEnergy(RealType r, + RealType Z, + RealType beta0) { - RealType beta[7] = { - 3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692}; - RealType idealEL = beta0; - RealType r1 = r * r; - for (int i = 0; i < 7; i++) { - idealEL += beta[i] * r1; - r1 *= r; - } - return idealEL * Z * Z; + RealType beta[7] = {3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692}; + RealType idealEL = beta0; + RealType r1 = r * r; + for (int i = 0; i < 7; i++) + { + idealEL += beta[i] * r1; + r1 *= r; + } + return idealEL * Z * Z; } // Get the ideal local energy for a vector of positions -template -void -CuspCorrectionConstructionT::getIdealLocalEnergy(const ValueVector& pos, - RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal) +template +void CuspCorrectionConstructionT::getIdealLocalEnergy(const ValueVector& pos, + RealType Z, + RealType Rc, + RealType ELorigAtRc, + ValueVector& ELideal) { - // assert(pos.size() == ELideal.size() - RealType beta0 = 0.0; - RealType tmp = getOneIdealLocalEnergy(Rc, Z, beta0); - beta0 = (ELorigAtRc - tmp) / (Z * Z); - for (int i = 0; i < pos.size(); i++) { - ELideal[i] = getOneIdealLocalEnergy(pos[i], Z, beta0); - } + // assert(pos.size() == ELideal.size() + RealType beta0 = 0.0; + RealType tmp = getOneIdealLocalEnergy(Rc, Z, beta0); + beta0 = (ELorigAtRc - tmp) / (Z * Z); + for (int i = 0; i < pos.size(); i++) + { + ELideal[i] = getOneIdealLocalEnergy(pos[i], Z, beta0); + } } // Evaluate constraints. Equations 9-13 in the paper. -template -void -CuspCorrectionConstructionT::evalX(RealType valRc, GradType gradRc, - ValueType lapRc, RealType Rc, RealType Z, RealType C, RealType valAtZero, - RealType eta0, TinyVector& X) +template +void CuspCorrectionConstructionT::evalX(RealType valRc, + GradType gradRc, + ValueType lapRc, + RealType Rc, + RealType Z, + RealType C, + RealType valAtZero, + RealType eta0, + TinyVector& X) { - X[0] = std::log(std::abs(valRc - C)); - X[1] = gradRc[0] / (valRc - C); - X[2] = (lapRc - 2.0 * gradRc[0] / Rc) / (valRc - C); - X[3] = -Z * (valAtZero + eta0) / (valAtZero - C); - X[4] = std::log(std::abs(valAtZero - C)); + X[0] = std::log(std::abs(valRc - C)); + X[1] = gradRc[0] / (valRc - C); + X[2] = (lapRc - 2.0 * gradRc[0] / Rc) / (valRc - C); + X[3] = -Z * (valAtZero + eta0) / (valAtZero - C); + X[4] = std::log(std::abs(valAtZero - C)); } // Compute polynomial coefficients from constraints. Eq. 14 in the paper. -template -void -CuspCorrectionConstructionT::X2alpha(const TinyVector& X, - RealType Rc, TinyVector& alpha) +template +void CuspCorrectionConstructionT::X2alpha(const TinyVector& X, + RealType Rc, + TinyVector& alpha) { - RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv; - alpha[0] = X[4]; - alpha[1] = X[3]; - alpha[2] = 6.0 * X[0] * RcInv2 - 3.0 * X[1] * RcInv + X[2] * 0.5 - - 3.0 * X[3] * RcInv - 6.0 * X[4] * RcInv2 - 0.5 * X[1] * X[1]; - alpha[3] = -8.0 * X[0] * RcInv2 * RcInv + 5.0 * X[1] * RcInv2 - - X[2] * RcInv + 3.0 * X[3] * RcInv2 + 8.0 * X[4] * RcInv2 * RcInv + - X[1] * X[1] * RcInv; - alpha[4] = 3.0 * X[0] * RcInv2 * RcInv2 - 2.0 * X[1] * RcInv2 * RcInv + - 0.5 * X[2] * RcInv2 - X[3] * RcInv2 * RcInv - - 3.0 * X[4] * RcInv2 * RcInv2 - 0.5 * X[1] * X[1] * RcInv2; + RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv; + alpha[0] = X[4]; + alpha[1] = X[3]; + alpha[2] = 6.0 * X[0] * RcInv2 - 3.0 * X[1] * RcInv + X[2] * 0.5 - 3.0 * X[3] * RcInv - 6.0 * X[4] * RcInv2 - + 0.5 * X[1] * X[1]; + alpha[3] = -8.0 * X[0] * RcInv2 * RcInv + 5.0 * X[1] * RcInv2 - X[2] * RcInv + 3.0 * X[3] * RcInv2 + + 8.0 * X[4] * RcInv2 * RcInv + X[1] * X[1] * RcInv; + alpha[4] = 3.0 * X[0] * RcInv2 * RcInv2 - 2.0 * X[1] * RcInv2 * RcInv + 0.5 * X[2] * RcInv2 - X[3] * RcInv2 * RcInv - + 3.0 * X[4] * RcInv2 * RcInv2 - 0.5 * X[1] * X[1] * RcInv2; } // Eq. 16 in the paper. -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::getZeff( - RealType Z, RealType etaAtZero, RealType phiBarAtZero) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getZeff(RealType Z, + RealType etaAtZero, + RealType phiBarAtZero) { - return Z * (1.0 + etaAtZero / phiBarAtZero); + return Z * (1.0 + etaAtZero / phiBarAtZero); } -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::phiBar( - const CuspCorrectionT& cusp, RealType r, OneMolecularOrbitalT& phiMO) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::phiBar(const CuspCorrectionT& cusp, + RealType r, + OneMolecularOrbitalT& phiMO) { - if (r <= cusp.cparam.Rc) - return cusp.cparam.C + cusp.Rr(r); - else - return phiMO.phi(r); + if (r <= cusp.cparam.Rc) + return cusp.cparam.C + cusp.Rr(r); + else + return phiMO.phi(r); } // Compute the effective one-electron local energy at a vector of points. // Eq. 15 in the paper for r < Rc. Normal local energy for R > Rc. -template -void -CuspCorrectionConstructionT::getCurrentLocalEnergy(const ValueVector& pos, - RealType Zeff, RealType Rc, RealType originalELatRc, - CuspCorrectionT& cusp, OneMolecularOrbitalT& phiMO, - ValueVector& ELcurr) +template +void CuspCorrectionConstructionT::getCurrentLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + RealType originalELatRc, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValueVector& ELcurr) { - // assert(pos.size() == ELcurr.size()); - ValueType val; - GradType grad; - ValueType lap; - phiMO.phi_vgl(Rc, val, grad, lap); - RealType dE = originalELatRc - (-0.5 * lap / val - Zeff / Rc); - for (int i = 0; i < pos.size(); i++) { - RealType r = pos[i]; - // prevent NaN's if phiBar is zero - RealType offset = 1e-12; - if (r <= Rc) { - RealType dp = cusp.dpr(r); - ELcurr[i] = -0.5 * cusp.Rr(r) * - (2.0 * dp / r + cusp.d2pr(r) + dp * dp) / - (offset + phiBar(cusp, r, phiMO)) - - Zeff / r + dE; - } - else { - phiMO.phi_vgl(pos[i], val, grad, lap); - ELcurr[i] = -0.5 * lap / val - Zeff / r + dE; - } + // assert(pos.size() == ELcurr.size()); + ValueType val; + GradType grad; + ValueType lap; + phiMO.phi_vgl(Rc, val, grad, lap); + RealType dE = originalELatRc - (-0.5 * lap / val - Zeff / Rc); + for (int i = 0; i < pos.size(); i++) + { + RealType r = pos[i]; + // prevent NaN's if phiBar is zero + RealType offset = 1e-12; + if (r <= Rc) + { + RealType dp = cusp.dpr(r); + ELcurr[i] = -0.5 * cusp.Rr(r) * (2.0 * dp / r + cusp.d2pr(r) + dp * dp) / (offset + phiBar(cusp, r, phiMO)) - + Zeff / r + dE; } + else + { + phiMO.phi_vgl(pos[i], val, grad, lap); + ELcurr[i] = -0.5 * lap / val - Zeff / r + dE; + } + } } // Return value is local energy at Rc -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::getOriginalLocalEnergy(const ValueVector& pos, - RealType Zeff, RealType Rc, OneMolecularOrbitalT& phiMO, +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getOriginalLocalEnergy( + const ValueVector& pos, + RealType Zeff, + RealType Rc, + OneMolecularOrbitalT& phiMO, ValueVector& ELorig) { - // assert(pos.size() == ELorig.size()); - - ValueType val; - GradType grad; - ValueType lap; - for (int i = 0; i < pos.size(); i++) { - RealType r = pos[i]; - phiMO.phi_vgl(r, val, grad, lap); - ELorig[i] = -0.5 * lap / val - Zeff / r; - } - - phiMO.phi_vgl(Rc, val, grad, lap); - return -0.5 * lap / val - Zeff / Rc; + // assert(pos.size() == ELorig.size()); + + ValueType val; + GradType grad; + ValueType lap; + for (int i = 0; i < pos.size(); i++) + { + RealType r = pos[i]; + phiMO.phi_vgl(r, val, grad, lap); + ELorig[i] = -0.5 * lap / val - Zeff / r; + } + + phiMO.phi_vgl(Rc, val, grad, lap); + return -0.5 * lap / val - Zeff / Rc; } // Sum of squares difference between the current local energy and the ideal // local energy. // This is the objective function to minimize. -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::getELchi2( - const ValueVector& ELcurr, const ValueVector& ELideal) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getELchi2(const ValueVector& ELcurr, + const ValueVector& ELideal) { - assert(ELcurr.size() == ELideal.size()); - - RealType chi2 = 0.0; - for (int i = 0; i < ELcurr.size(); i++) { - RealType diff = ELcurr[i] - ELideal[i]; - chi2 += diff * diff; - } - return chi2; + assert(ELcurr.size() == ELideal.size()); + + RealType chi2 = 0.0; + for (int i = 0; i < ELcurr.size(); i++) + { + RealType diff = ELcurr[i] - ELideal[i]; + chi2 += diff * diff; + } + return chi2; } // Compute the chi squared distance given a value for phi at zero. -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::evaluateForPhi0Body(RealType phi0, - ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal, - CuspCorrectionT& cusp, OneMolecularOrbitalT& phiMO, - ValGradLap phiAtRc, RealType etaAtZero, RealType ELorigAtRc, RealType Z) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::evaluateForPhi0Body( + RealType phi0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValGradLap phiAtRc, + RealType etaAtZero, + RealType ELorigAtRc, + RealType Z) { - cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0; - cusp.cparam.C = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0; - TinyVector X; - evalX(phiAtRc.val, phiAtRc.grad, phiAtRc.lap, cusp.cparam.Rc, Z, - cusp.cparam.C, phi0, etaAtZero, X); - X2alpha(X, cusp.cparam.Rc, cusp.cparam.alpha); - RealType Zeff = getZeff(Z, etaAtZero, phiBar(cusp, 0.0, phiMO)); - getCurrentLocalEnergy( - pos, Zeff, cusp.cparam.Rc, ELorigAtRc, cusp, phiMO, ELcurr); - RealType chi2 = getELchi2(ELcurr, ELideal); - return chi2; + cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0; + cusp.cparam.C = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0; + TinyVector X; + evalX(phiAtRc.val, phiAtRc.grad, phiAtRc.lap, cusp.cparam.Rc, Z, cusp.cparam.C, phi0, etaAtZero, X); + X2alpha(X, cusp.cparam.Rc, cusp.cparam.alpha); + RealType Zeff = getZeff(Z, etaAtZero, phiBar(cusp, 0.0, phiMO)); + getCurrentLocalEnergy(pos, Zeff, cusp.cparam.Rc, ELorigAtRc, cusp, phiMO, ELcurr); + RealType chi2 = getELchi2(ELcurr, ELideal); + return chi2; } // Optimize free parameter (value of phi at zero) to minimize distance to ideal // local energy. Output is return value and parameter values are in cusp.cparam -template -typename CuspCorrectionConstructionT::RealType -CuspCorrectionConstructionT::minimizeForPhiAtZero(CuspCorrectionT& cusp, - OneMolecularOrbitalT& phiMO, RealType Z, RealType eta0, ValueVector& pos, - ValueVector& ELcurr, ValueVector& ELideal, RealType start_phi0) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::minimizeForPhiAtZero( + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + RealType start_phi0) { - ValGradLap vglAtRc; - ValueVector tmp_pos(0); - ValueVector ELorig(0); - RealType Zeff = getZeff(Z, eta0, phiBar(cusp, 0.0, phiMO)); - - RealType ELorigAtRc = - getOriginalLocalEnergy(tmp_pos, Zeff, cusp.cparam.Rc, phiMO, ELorig); - getIdealLocalEnergy(pos, Z, cusp.cparam.Rc, ELorigAtRc, ELideal); - phiMO.phi_vgl(cusp.cparam.Rc, vglAtRc.val, vglAtRc.grad, vglAtRc.lap); - - Bracket_min_t bracket(start_phi0, 0.0, 0.0, false); - try { - bracket = bracket_minimum( - [&](RealType x) -> RealType { - return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, - vglAtRc, eta0, ELorigAtRc, Z); - }, - start_phi0); - } - catch (const std::runtime_error& e) { - APP_ABORT("Bracketing minimum failed for finding phi0. \n"); - } - - auto min_res = find_minimum( + ValGradLap vglAtRc; + ValueVector tmp_pos(0); + ValueVector ELorig(0); + RealType Zeff = getZeff(Z, eta0, phiBar(cusp, 0.0, phiMO)); + + RealType ELorigAtRc = getOriginalLocalEnergy(tmp_pos, Zeff, cusp.cparam.Rc, phiMO, ELorig); + getIdealLocalEnergy(pos, Z, cusp.cparam.Rc, ELorigAtRc, ELideal); + phiMO.phi_vgl(cusp.cparam.Rc, vglAtRc.val, vglAtRc.grad, vglAtRc.lap); + + Bracket_min_t bracket(start_phi0, 0.0, 0.0, false); + try + { + bracket = bracket_minimum( [&](RealType x) -> RealType { - return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, - vglAtRc, eta0, ELorigAtRc, Z); + return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z); }, - bracket); - - start_phi0 = min_res.first; - - return min_res.second; + start_phi0); + } + catch (const std::runtime_error& e) + { + APP_ABORT("Bracketing minimum failed for finding phi0. \n"); + } + + auto min_res = find_minimum( + [&](RealType x) -> RealType { + return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z); + }, + bracket); + + start_phi0 = min_res.first; + + return min_res.second; } // Optimize the cutoff radius. There is an inner loop optimizing for phi0 for // each value of Rc. Elcurr and ELideal are expected to have the correct size on // input (same size as pos) Output is parameter values in cusp.cparam -template -void -CuspCorrectionConstructionT::minimizeForRc(CuspCorrectionT& cusp, - OneMolecularOrbitalT& phiMO, RealType Z, RealType Rc_init, - RealType Rc_max, RealType eta0, ValueVector& pos, ValueVector& ELcurr, - ValueVector& ELideal) +template +void CuspCorrectionConstructionT::minimizeForRc(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType Rc_init, + RealType Rc_max, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal) { - Bracket_min_t bracket(Rc_init, 0.0, 0.0, false); - RealType start_phi0 = phiMO.phi(0.0); - try { - bracket = bracket_minimum( - [&](RealType x) -> RealType { - cusp.cparam.Rc = x; - return minimizeForPhiAtZero( - cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - }, - Rc_init, Rc_max); - } - catch (const std::runtime_error& e) { - APP_ABORT("Bracketing minimum failed for finding rc. \n"); - } - - if (bracket.success) { - auto min_res = find_minimum( - [&](RealType x) -> RealType { - cusp.cparam.Rc = x; - return minimizeForPhiAtZero( - cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - }, - bracket); - } - else { - cusp.cparam.Rc = bracket.a; - minimizeForPhiAtZero( - cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - } + Bracket_min_t bracket(Rc_init, 0.0, 0.0, false); + RealType start_phi0 = phiMO.phi(0.0); + try + { + bracket = bracket_minimum( + [&](RealType x) -> RealType { + cusp.cparam.Rc = x; + return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); + }, + Rc_init, Rc_max); + } + catch (const std::runtime_error& e) + { + APP_ABORT("Bracketing minimum failed for finding rc. \n"); + } + + if (bracket.success) + { + auto min_res = find_minimum( + [&](RealType x) -> RealType { + cusp.cparam.Rc = x; + return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); + }, + bracket); + } + else + { + cusp.cparam.Rc = bracket.a; + minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); + } } // Modifies orbital set lcwc -template -void -CuspCorrectionConstructionT::applyCuspCorrection( - const Matrix>& info, - ParticleSetT& targetPtcl, ParticleSetT& sourcePtcl, - LCAOrbitalSetT& lcao, SoaCuspCorrectionT& cusp, const std::string& id) +template +void CuspCorrectionConstructionT::applyCuspCorrection(const Matrix>& info, + ParticleSetT& targetPtcl, + ParticleSetT& sourcePtcl, + LCAOrbitalSetT& lcao, + SoaCuspCorrectionT& cusp, + const std::string& id) { - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - using RealType = typename SPOSetT::RealType; - - NewTimer& cuspApplyTimer = createGlobalTimer( - "CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium); - - ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); - - LCAOrbitalSetT phi("phi", - std::unique_ptr::basis_type>( - lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - LCAOrbitalSetT eta("eta", - std::unique_ptr::basis_type>( - lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - std::vector corrCenter(num_centers, "true"); - - // What's this grid's lifespan? Why on the heap? - auto radial_grid = std::make_unique>(); - radial_grid->set(0.000001, 100.0, 1001); - - Vector xgrid; - Vector rad_orb; - xgrid.resize(radial_grid->size()); - rad_orb.resize(radial_grid->size()); - for (int ig = 0; ig < radial_grid->size(); ig++) { - xgrid[ig] = radial_grid->r(ig); - } - - for (int ic = 0; ic < num_centers; ic++) { - *eta.C = *lcao.C; - *phi.C = *lcao.C; - - splitPhiEta(ic, corrCenter, phi, eta); - - // loop over MO index - cot must be an array (of len MO size) - // the loop is inside cot - in the multiqunitic - auto cot = std::make_unique>(); - cot->initializeRadialSet(*radial_grid, orbital_set_size); - // How is this useful? - // cot->ID.resize(orbital_set_size); - // for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { - // cot->ID[mo_idx] = mo_idx; - // } - - for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { - computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, - xgrid, rad_orb, info(ic, mo_idx)); - RealType yprime_i = (rad_orb[1] - rad_orb[0]) / - (radial_grid->r(1) - radial_grid->r(0)); - OneDimQuinticSpline radial_spline( - radial_grid->makeClone(), rad_orb); - radial_spline.spline(0, yprime_i, rad_orb.size() - 1, 0.0); - cot->addSpline(mo_idx, radial_spline); - - if (outputManager.isDebugActive()) { - // For testing against AoS output - // Output phiBar to soaOrbs.downdet.C0.MO0 - int nElms = 500; - RealType dx = info(ic, mo_idx).Rc * 1.2 / nElms; - Vector pos; - Vector output_orb; - pos.resize(nElms); - output_orb.resize(nElms); - for (int i = 0; i < nElms; i++) { - pos[i] = (i + 1.0) * dx; - } - computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, - pos, output_orb, info(ic, mo_idx)); - std::string filename = "soaOrbs." + id + ".C" + - std::to_string(ic) + ".MO" + std::to_string(mo_idx); - std::cout << "Writing to " << filename << std::endl; - std::ofstream out(filename.c_str()); - out << "# r phiBar(r)" << std::endl; - for (int i = 0; i < nElms; i++) { - out << pos[i] << " " << output_orb[i] << std::endl; - } - out.close(); - } + const int num_centers = info.rows(); + const int orbital_set_size = info.cols(); + using RealType = typename SPOSetT::RealType; + + NewTimer& cuspApplyTimer = createGlobalTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium); + + ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); + + LCAOrbitalSetT phi("phi", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); + phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); + + LCAOrbitalSetT eta("eta", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); + eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); + + std::vector corrCenter(num_centers, "true"); + + // What's this grid's lifespan? Why on the heap? + auto radial_grid = std::make_unique>(); + radial_grid->set(0.000001, 100.0, 1001); + + Vector xgrid; + Vector rad_orb; + xgrid.resize(radial_grid->size()); + rad_orb.resize(radial_grid->size()); + for (int ig = 0; ig < radial_grid->size(); ig++) + { + xgrid[ig] = radial_grid->r(ig); + } + + for (int ic = 0; ic < num_centers; ic++) + { + *eta.C = *lcao.C; + *phi.C = *lcao.C; + + splitPhiEta(ic, corrCenter, phi, eta); + + // loop over MO index - cot must be an array (of len MO size) + // the loop is inside cot - in the multiqunitic + auto cot = std::make_unique>(); + cot->initializeRadialSet(*radial_grid, orbital_set_size); + // How is this useful? + // cot->ID.resize(orbital_set_size); + // for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { + // cot->ID[mo_idx] = mo_idx; + // } + + for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) + { + computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, xgrid, rad_orb, info(ic, mo_idx)); + RealType yprime_i = (rad_orb[1] - rad_orb[0]) / (radial_grid->r(1) - radial_grid->r(0)); + OneDimQuinticSpline radial_spline(radial_grid->makeClone(), rad_orb); + radial_spline.spline(0, yprime_i, rad_orb.size() - 1, 0.0); + cot->addSpline(mo_idx, radial_spline); + + if (outputManager.isDebugActive()) + { + // For testing against AoS output + // Output phiBar to soaOrbs.downdet.C0.MO0 + int nElms = 500; + RealType dx = info(ic, mo_idx).Rc * 1.2 / nElms; + Vector pos; + Vector output_orb; + pos.resize(nElms); + output_orb.resize(nElms); + for (int i = 0; i < nElms; i++) + { + pos[i] = (i + 1.0) * dx; + } + computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, pos, output_orb, info(ic, mo_idx)); + std::string filename = "soaOrbs." + id + ".C" + std::to_string(ic) + ".MO" + std::to_string(mo_idx); + std::cout << "Writing to " << filename << std::endl; + std::ofstream out(filename.c_str()); + out << "# r phiBar(r)" << std::endl; + for (int i = 0; i < nElms; i++) + { + out << pos[i] << " " << output_orb[i] << std::endl; } - cusp.add(ic, std::move(cot)); + out.close(); + } } - removeSTypeOrbitals(corrCenter, lcao); + cusp.add(ic, std::move(cot)); + } + removeSTypeOrbitals(corrCenter, lcao); } -template -void -CuspCorrectionConstructionT::generateCuspInfo( - Matrix>& info, - const ParticleSetT& targetPtcl, const ParticleSetT& sourcePtcl, - const LCAOrbitalSetT& lcao, const std::string& id, Communicate& Comm) +template +void CuspCorrectionConstructionT::generateCuspInfo(Matrix>& info, + const ParticleSetT& targetPtcl, + const ParticleSetT& sourcePtcl, + const LCAOrbitalSetT& lcao, + const std::string& id, + Communicate& Comm) { - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - using RealType = typename SPOSetT::RealType; - using ValueVector = typename SPOSetT::ValueVector; + const int num_centers = info.rows(); + const int orbital_set_size = info.cols(); + using RealType = typename SPOSetT::RealType; + using ValueVector = typename SPOSetT::ValueVector; - NewTimer& cuspCreateTimer = createGlobalTimer( - "CuspCorrectionConstruction::createCuspParameters", timer_level_medium); - NewTimer& splitPhiEtaTimer = createGlobalTimer( - "CuspCorrectionConstruction::splitPhiEta", timer_level_fine); - NewTimer& computeTimer = createGlobalTimer( - "CuspCorrectionConstruction::computeCorrection", timer_level_fine); + NewTimer& cuspCreateTimer = createGlobalTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium); + NewTimer& splitPhiEtaTimer = createGlobalTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine); + NewTimer& computeTimer = createGlobalTimer("CuspCorrectionConstruction::computeCorrection", timer_level_fine); - ScopedTimer createCuspTimerWrapper(cuspCreateTimer); + ScopedTimer createCuspTimerWrapper(cuspCreateTimer); - LCAOrbitalSetT phi("phi", - std::unique_ptr::basis_type>( - lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); + LCAOrbitalSetT phi("phi", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); + phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - LCAOrbitalSetT eta("eta", - std::unique_ptr::basis_type>( - lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); + LCAOrbitalSetT eta("eta", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); + eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); - std::vector corrCenter(num_centers, "true"); + std::vector corrCenter(num_centers, "true"); - using GridType = OneDimGridBase; - int npts = 500; + using GridType = OneDimGridBase; + int npts = 500; - // Parallelize correction of MO's across MPI ranks - std::vector offset; - FairDivideLow(orbital_set_size, Comm.size(), offset); + // Parallelize correction of MO's across MPI ranks + std::vector offset; + FairDivideLow(orbital_set_size, Comm.size(), offset); - int start_mo = offset[Comm.rank()]; - int end_mo = offset[Comm.rank() + 1]; - app_log() - << " Number of molecular orbitals to compute correction on this rank: " - << end_mo - start_mo << std::endl; + int start_mo = offset[Comm.rank()]; + int end_mo = offset[Comm.rank() + 1]; + app_log() << " Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl; // Specify dynamic scheduling explicitly for load balancing. Each iteration // should take enough time that scheduling overhead is not an issue. #pragma omp parallel for schedule(dynamic) collapse(2) - for (int center_idx = 0; center_idx < num_centers; center_idx++) { - for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) { - ParticleSetT localTargetPtcl(targetPtcl); - ParticleSetT localSourcePtcl(sourcePtcl); + for (int center_idx = 0; center_idx < num_centers; center_idx++) + { + for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) + { + ParticleSetT localTargetPtcl(targetPtcl); + ParticleSetT localSourcePtcl(sourcePtcl); - LCAOrbitalSetT local_phi("local_phi", - std::unique_ptr::basis_type>( - phi.myBasisSet->makeClone())); - local_phi.setOrbitalSetSize(phi.getOrbitalSetSize()); + LCAOrbitalSetT local_phi("local_phi", + std::unique_ptr::basis_type>(phi.myBasisSet->makeClone())); + local_phi.setOrbitalSetSize(phi.getOrbitalSetSize()); - LCAOrbitalSetT local_eta("local_eta", - std::unique_ptr::basis_type>( - eta.myBasisSet->makeClone())); - local_eta.setOrbitalSetSize(eta.getOrbitalSetSize()); + LCAOrbitalSetT local_eta("local_eta", + std::unique_ptr::basis_type>(eta.myBasisSet->makeClone())); + local_eta.setOrbitalSetSize(eta.getOrbitalSetSize()); #pragma omp critical - app_log() << " Working on MO: " << mo_idx - << " Center: " << center_idx << std::endl; - - { - ScopedTimer local_timer(splitPhiEtaTimer); - - *local_eta.C = *lcao.C; - *local_phi.C = *lcao.C; - splitPhiEta(center_idx, corrCenter, local_phi, local_eta); - } - - bool corrO = false; - auto& cref(*(local_phi.C)); - for (int ip = 0; ip < cref.cols(); ip++) { - if (std::abs(cref(mo_idx, ip)) > 0) { - corrO = true; - break; - } - } - - if (corrO) { - OneMolecularOrbitalT etaMO( - &localTargetPtcl, &localSourcePtcl, &local_eta); - etaMO.changeOrbital(center_idx, mo_idx); - - OneMolecularOrbitalT phiMO( - &localTargetPtcl, &localSourcePtcl, &local_phi); - phiMO.changeOrbital(center_idx, mo_idx); - - SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet()); - int iz = tspecies.addAttribute("charge"); - RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]); - - RealType Rc_max = 0.2; - RealType rc = 0.1; - - RealType dx = rc * 1.2 / npts; - ValueVector pos(npts); - ValueVector ELideal(npts); - ValueVector ELcurr(npts); - for (int i = 0; i < npts; i++) { - pos[i] = (i + 1.0) * dx; - } - - RealType eta0 = etaMO.phi(0.0); - ValueVector ELorig(npts); - CuspCorrectionT cusp(info(center_idx, mo_idx)); - { - ScopedTimer local_timer(computeTimer); - minimizeForRc( - cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal); - } - // Update shared object. Each iteration accesses a different - // element and this is an array (no bookkeeping data to update), - // so no synchronization is necessary. - info(center_idx, mo_idx) = cusp.cparam; - } + app_log() << " Working on MO: " << mo_idx << " Center: " << center_idx << std::endl; + + { + ScopedTimer local_timer(splitPhiEtaTimer); + + *local_eta.C = *lcao.C; + *local_phi.C = *lcao.C; + splitPhiEta(center_idx, corrCenter, local_phi, local_eta); + } + + bool corrO = false; + auto& cref(*(local_phi.C)); + for (int ip = 0; ip < cref.cols(); ip++) + { + if (std::abs(cref(mo_idx, ip)) > 0) + { + corrO = true; + break; + } + } + + if (corrO) + { + OneMolecularOrbitalT etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta); + etaMO.changeOrbital(center_idx, mo_idx); + + OneMolecularOrbitalT phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi); + phiMO.changeOrbital(center_idx, mo_idx); + + SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet()); + int iz = tspecies.addAttribute("charge"); + RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]); + + RealType Rc_max = 0.2; + RealType rc = 0.1; + + RealType dx = rc * 1.2 / npts; + ValueVector pos(npts); + ValueVector ELideal(npts); + ValueVector ELcurr(npts); + for (int i = 0; i < npts; i++) + { + pos[i] = (i + 1.0) * dx; } - } - for (int root = 0; root < Comm.size(); root++) { - int start_mo = offset[root]; - int end_mo = offset[root + 1]; - for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) { - for (int center_idx = 0; center_idx < num_centers; center_idx++) { - broadcastCuspInfo(info(center_idx, mo_idx), Comm, root); - } + RealType eta0 = etaMO.phi(0.0); + ValueVector ELorig(npts); + CuspCorrectionT cusp(info(center_idx, mo_idx)); + { + ScopedTimer local_timer(computeTimer); + minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal); } + // Update shared object. Each iteration accesses a different + // element and this is an array (no bookkeeping data to update), + // so no synchronization is necessary. + info(center_idx, mo_idx) = cusp.cparam; + } + } + } + + for (int root = 0; root < Comm.size(); root++) + { + int start_mo = offset[root]; + int end_mo = offset[root + 1]; + for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) + { + for (int center_idx = 0; center_idx < num_centers; center_idx++) + { + broadcastCuspInfo(info(center_idx, mo_idx), Comm, root); + } } + } } -template -void -CuspCorrectionConstructionT::broadcastCuspInfo( - CuspCorrectionParametersT& param, Communicate& Comm, int root) +template +void CuspCorrectionConstructionT::broadcastCuspInfo(CuspCorrectionParametersT& param, Communicate& Comm, int root) { #ifdef HAVE_MPI - std::vector buffer(9); - buffer[0] = param.Rc; - buffer[1] = param.C; - buffer[2] = param.sg; - buffer[3] = param.alpha[0]; - buffer[4] = param.alpha[1]; - buffer[5] = param.alpha[2]; - buffer[6] = param.alpha[3]; - buffer[7] = param.alpha[4]; - buffer[8] = param.redo; - - Comm.comm.broadcast(buffer.begin(), buffer.end(), root); - - param.Rc = buffer[0]; - param.C = buffer[1]; - param.sg = buffer[2]; - param.alpha[0] = buffer[3]; - param.alpha[1] = buffer[4]; - param.alpha[2] = buffer[5]; - param.alpha[3] = buffer[6]; - param.alpha[4] = buffer[7]; - param.redo = buffer[8] == 0.0 ? 0 : 1; + std::vector buffer(9); + buffer[0] = param.Rc; + buffer[1] = param.C; + buffer[2] = param.sg; + buffer[3] = param.alpha[0]; + buffer[4] = param.alpha[1]; + buffer[5] = param.alpha[2]; + buffer[6] = param.alpha[3]; + buffer[7] = param.alpha[4]; + buffer[8] = param.redo; + + Comm.comm.broadcast(buffer.begin(), buffer.end(), root); + + param.Rc = buffer[0]; + param.C = buffer[1]; + param.sg = buffer[2]; + param.alpha[0] = buffer[3]; + param.alpha[1] = buffer[4]; + param.alpha[2] = buffer[5]; + param.alpha[3] = buffer[6]; + param.alpha[4] = buffer[7]; + param.redo = buffer[8] == 0.0 ? 0 : 1; #endif } -template -bool -CuspCorrectionConstructionT::readCuspInfo(const std::string& cuspInfoFile, - const std::string& objectName, int OrbitalSetSize, - Matrix>& info) +template +bool CuspCorrectionConstructionT::readCuspInfo(const std::string& cuspInfoFile, + const std::string& objectName, + int OrbitalSetSize, + Matrix>& info) { - bool success = true; - int ncenter = info.rows(); - app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl; - Libxml2Document adoc; - if (!adoc.parse(cuspInfoFile)) { - app_log() << "Could not find precomputed cusp data for spo set: " - << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; + bool success = true; + int ncenter = info.rows(); + app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl; + Libxml2Document adoc; + if (!adoc.parse(cuspInfoFile)) + { + app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; + app_log() << "Recalculating data.\n"; + return false; + } + xmlNodePtr head = adoc.getRoot(); + head = head->children; + xmlNodePtr cur = NULL, ctr; + while (head != NULL) + { + std::string cname(getNodeName(head)); + if (cname == "sposet") + { + std::string name; + OhmmsAttributeSet spoAttrib; + spoAttrib.add(name, "name"); + spoAttrib.put(head); + if (name == objectName) + { + cur = head; + break; + } } - xmlNodePtr head = adoc.getRoot(); - head = head->children; - xmlNodePtr cur = NULL, ctr; - while (head != NULL) { - std::string cname(getNodeName(head)); - if (cname == "sposet") { - std::string name; - OhmmsAttributeSet spoAttrib; - spoAttrib.add(name, "name"); - spoAttrib.put(head); - if (name == objectName) { - cur = head; - break; - } + head = head->next; + } + if (cur == NULL) + { + app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; + app_log() << "Recalculating data.\n"; + return false; + } + else + { + app_log() << "Found precomputed cusp data for spo set: " << objectName << std::endl; + } + cur = cur->children; + while (cur != NULL) + { + std::string cname(getNodeName(cur)); + if (cname == "center") + { + int num = -1; + OhmmsAttributeSet Attrib; + Attrib.add(num, "num"); + Attrib.put(cur); + if (num < 0 || num >= ncenter) + { + APP_ABORT("Error with cusp info xml block. incorrect center " + "number. \n"); + } + ctr = cur->children; + while (ctr != NULL) + { + std::string cname(getNodeName(ctr)); + if (cname == "orbital") + { + int orb = -1; + OhmmsAttributeSet orbAttrib; + RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; + orbAttrib.add(orb, "num"); + orbAttrib.add(a1, "redo"); + orbAttrib.add(a2, "C"); + orbAttrib.add(a3, "sg"); + orbAttrib.add(a4, "rc"); + orbAttrib.add(a5, "a1"); + orbAttrib.add(a6, "a2"); + orbAttrib.add(a7, "a3"); + orbAttrib.add(a8, "a4"); + orbAttrib.add(a9, "a5"); + orbAttrib.put(ctr); + if (orb < OrbitalSetSize) + { + info(num, orb).redo = a1; + info(num, orb).C = a2; + info(num, orb).sg = a3; + info(num, orb).Rc = a4; + info(num, orb).alpha[0] = a5; + info(num, orb).alpha[1] = a6; + info(num, orb).alpha[2] = a7; + info(num, orb).alpha[3] = a8; + info(num, orb).alpha[4] = a9; + } } - head = head->next; - } - if (cur == NULL) { - app_log() << "Could not find precomputed cusp data for spo set: " - << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; - } - else { - app_log() << "Found precomputed cusp data for spo set: " << objectName - << std::endl; + ctr = ctr->next; + } } - cur = cur->children; - while (cur != NULL) { - std::string cname(getNodeName(cur)); - if (cname == "center") { - int num = -1; - OhmmsAttributeSet Attrib; - Attrib.add(num, "num"); - Attrib.put(cur); - if (num < 0 || num >= ncenter) { - APP_ABORT("Error with cusp info xml block. incorrect center " - "number. \n"); - } - ctr = cur->children; - while (ctr != NULL) { - std::string cname(getNodeName(ctr)); - if (cname == "orbital") { - int orb = -1; - OhmmsAttributeSet orbAttrib; - RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; - orbAttrib.add(orb, "num"); - orbAttrib.add(a1, "redo"); - orbAttrib.add(a2, "C"); - orbAttrib.add(a3, "sg"); - orbAttrib.add(a4, "rc"); - orbAttrib.add(a5, "a1"); - orbAttrib.add(a6, "a2"); - orbAttrib.add(a7, "a3"); - orbAttrib.add(a8, "a4"); - orbAttrib.add(a9, "a5"); - orbAttrib.put(ctr); - if (orb < OrbitalSetSize) { - info(num, orb).redo = a1; - info(num, orb).C = a2; - info(num, orb).sg = a3; - info(num, orb).Rc = a4; - info(num, orb).alpha[0] = a5; - info(num, orb).alpha[1] = a6; - info(num, orb).alpha[2] = a7; - info(num, orb).alpha[3] = a8; - info(num, orb).alpha[4] = a9; - } - } - ctr = ctr->next; - } - } - cur = cur->next; - } - return success; + cur = cur->next; + } + return success; } -template -void -CuspCorrectionConstructionT::saveCusp(const std::string& filename, - const Matrix>& info, const std::string& id) +template +void CuspCorrectionConstructionT::saveCusp(const std::string& filename, + const Matrix>& info, + const std::string& id) { - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); - xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); - xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet"); - xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str()); - xmlAddChild(cuspRoot, spo); - xmlDocSetRootElement(doc, cuspRoot); - - for (int center_idx = 0; center_idx < num_centers; center_idx++) { - xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center"); - std::ostringstream num; - num << center_idx; - xmlNewProp( - ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str()); - - for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { - std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5; - xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital"); - num0 << mo_idx; - xmlNewProp( - orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str()); - - C.setf(std::ios::scientific, std::ios::floatfield); - C.precision(14); - C << info(center_idx, mo_idx).C; - sg.setf(std::ios::scientific, std::ios::floatfield); - sg.precision(14); - sg << info(center_idx, mo_idx).sg; - rc.setf(std::ios::scientific, std::ios::floatfield); - rc.precision(14); - rc << info(center_idx, mo_idx).Rc; - a1.setf(std::ios::scientific, std::ios::floatfield); - a1.precision(14); - a1 << info(center_idx, mo_idx).alpha[0]; - a2.setf(std::ios::scientific, std::ios::floatfield); - a2.precision(14); - a2 << info(center_idx, mo_idx).alpha[1]; - a3.setf(std::ios::scientific, std::ios::floatfield); - a3.precision(14); - a3 << info(center_idx, mo_idx).alpha[2]; - a4.setf(std::ios::scientific, std::ios::floatfield); - a4.precision(14); - a4 << info(center_idx, mo_idx).alpha[3]; - a5.setf(std::ios::scientific, std::ios::floatfield); - a5.precision(14); - a5 << info(center_idx, mo_idx).alpha[4]; - xmlNewProp( - orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str()); - xmlNewProp( - orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str()); - xmlAddChild(ctr, orb); - } - xmlAddChild(spo, ctr); + const int num_centers = info.rows(); + const int orbital_set_size = info.cols(); + xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); + xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); + xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet"); + xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str()); + xmlAddChild(cuspRoot, spo); + xmlDocSetRootElement(doc, cuspRoot); + + for (int center_idx = 0; center_idx < num_centers; center_idx++) + { + xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center"); + std::ostringstream num; + num << center_idx; + xmlNewProp(ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str()); + + for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) + { + std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5; + xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital"); + num0 << mo_idx; + xmlNewProp(orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str()); + + C.setf(std::ios::scientific, std::ios::floatfield); + C.precision(14); + C << info(center_idx, mo_idx).C; + sg.setf(std::ios::scientific, std::ios::floatfield); + sg.precision(14); + sg << info(center_idx, mo_idx).sg; + rc.setf(std::ios::scientific, std::ios::floatfield); + rc.precision(14); + rc << info(center_idx, mo_idx).Rc; + a1.setf(std::ios::scientific, std::ios::floatfield); + a1.precision(14); + a1 << info(center_idx, mo_idx).alpha[0]; + a2.setf(std::ios::scientific, std::ios::floatfield); + a2.precision(14); + a2 << info(center_idx, mo_idx).alpha[1]; + a3.setf(std::ios::scientific, std::ios::floatfield); + a3.precision(14); + a3 << info(center_idx, mo_idx).alpha[2]; + a4.setf(std::ios::scientific, std::ios::floatfield); + a4.precision(14); + a4 << info(center_idx, mo_idx).alpha[3]; + a5.setf(std::ios::scientific, std::ios::floatfield); + a5.precision(14); + a5 << info(center_idx, mo_idx).alpha[4]; + xmlNewProp(orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str()); + xmlAddChild(ctr, orb); } + xmlAddChild(spo, ctr); + } - app_log() << "Saving resulting cusp Info xml block to: " << filename - << std::endl; - xmlSaveFormatFile(filename.c_str(), doc, 1); - xmlFreeDoc(doc); + app_log() << "Saving resulting cusp Info xml block to: " << filename << std::endl; + xmlSaveFormatFile(filename.c_str(), doc, 1); + xmlFreeDoc(doc); } template class CuspCorrectionConstructionT; diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h index 497898bfe8e..cee0f559959 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h @@ -22,132 +22,127 @@ class Communicate; namespace qmcplusplus { -template +template class ParticleSetT; -template +template class OneMolecularOrbitalT { public: - using RealType = typename OrbitalSetTraits::RealType; - using ValueType = typename OrbitalSetTraits::ValueType; - using GradType = typename OrbitalSetTraits::GradType; - using ValueVector = typename OrbitalSetTraits::ValueVector; - using GradVector = typename OrbitalSetTraits::GradVector; - using SPOSetPtr = SPOSetT*; - - ValueType - phi(RealType r) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateValue(*targetPtcl, 0, val1); - - return val1[curOrb]; - } - - void - phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1); - - val = val1[curOrb]; - grad = grad1[curOrb]; - lap = lap1[curOrb]; - } - - OneMolecularOrbitalT( - ParticleSetT* targetP, ParticleSetT* sourceP, SPOSetPtr Phi) : - targetPtcl(targetP), - sourcePtcl(sourceP), - curOrb(0), - curCenter(0) - { - Psi1 = Phi; - int norb = Psi1->getOrbitalSetSize(); - val1.resize(norb); - grad1.resize(norb); - lap1.resize(norb); - } - - void - changeOrbital(int centerIdx, int orbIdx) - { - curCenter = centerIdx; - curOrb = orbIdx; - } + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using GradType = typename OrbitalSetTraits::GradType; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using GradVector = typename OrbitalSetTraits::GradVector; + using SPOSetPtr = SPOSetT*; + + ValueType phi(RealType r) + { + TinyVector dr = 0; + dr[0] = r; + + targetPtcl->R[0] = sourcePtcl->R[curCenter]; + targetPtcl->makeMove(0, dr); + Psi1->evaluateValue(*targetPtcl, 0, val1); + + return val1[curOrb]; + } + + void phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap) + { + TinyVector dr = 0; + dr[0] = r; + + targetPtcl->R[0] = sourcePtcl->R[curCenter]; + targetPtcl->makeMove(0, dr); + Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1); + + val = val1[curOrb]; + grad = grad1[curOrb]; + lap = lap1[curOrb]; + } + + OneMolecularOrbitalT(ParticleSetT* targetP, ParticleSetT* sourceP, SPOSetPtr Phi) + : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), curCenter(0) + { + Psi1 = Phi; + int norb = Psi1->getOrbitalSetSize(); + val1.resize(norb); + grad1.resize(norb); + lap1.resize(norb); + } + + void changeOrbital(int centerIdx, int orbIdx) + { + curCenter = centerIdx; + curOrb = orbIdx; + } private: - /// Temporary storage for real wavefunction values - ValueVector val1; - GradVector grad1; - ValueVector lap1; + /// Temporary storage for real wavefunction values + ValueVector val1; + GradVector grad1; + ValueVector lap1; - /// target ParticleSet - ParticleSetT* targetPtcl; - /// source ParticleSet - ParticleSetT* sourcePtcl; + /// target ParticleSet + ParticleSetT* targetPtcl; + /// source ParticleSet + ParticleSetT* sourcePtcl; - /// Index of orbital - int curOrb; + /// Index of orbital + int curOrb; - /// Index of atomic center - int curCenter; + /// Index of atomic center + int curCenter; - SPOSetPtr Psi1; + SPOSetPtr Psi1; }; -template +template class CuspCorrectionConstructionT { public: - using RealType = typename OrbitalSetTraits::RealType; - using ValueType = typename OrbitalSetTraits::ValueType; - using ValueVector = typename OrbitalSetTraits::ValueVector; - using GradType = typename OrbitalSetTraits::GradType; - using GradVector = typename OrbitalSetTraits::GradVector; - - struct ValGradLap - { - ValueType val; - GradType grad; - ValueType lap; - }; - - /// Divide molecular orbital into atomic S-orbitals on this center (phi), - /// and everything else (eta). - static void - splitPhiEta(int center, const std::vector& corrCenter, - LCAOrbitalSetT& phi, LCAOrbitalSetT& eta); - - /// Remove S atomic orbitals from all molecular orbitals on all centers. - static void - removeSTypeOrbitals( - const std::vector& corrCenter, LCAOrbitalSetT& Phi); - - /// Compute the radial part of the corrected wavefunction - static void - computeRadialPhiBar(ParticleSetT* targetP, ParticleSetT* sourceP, - int curOrb_, int curCenter_, SPOSetT* Phi, Vector& xgrid, - Vector& rad_orb, const CuspCorrectionParametersT& data); - - /** Ideal local energy at one point + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using GradType = typename OrbitalSetTraits::GradType; + using GradVector = typename OrbitalSetTraits::GradVector; + + struct ValGradLap + { + ValueType val; + GradType grad; + ValueType lap; + }; + + /// Divide molecular orbital into atomic S-orbitals on this center (phi), + /// and everything else (eta). + static void splitPhiEta(int center, + const std::vector& corrCenter, + LCAOrbitalSetT& phi, + LCAOrbitalSetT& eta); + + /// Remove S atomic orbitals from all molecular orbitals on all centers. + static void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSetT& Phi); + + /// Compute the radial part of the corrected wavefunction + static void computeRadialPhiBar(ParticleSetT* targetP, + ParticleSetT* sourceP, + int curOrb_, + int curCenter_, + SPOSetT* Phi, + Vector& xgrid, + Vector& rad_orb, + const CuspCorrectionParametersT& data); + + /** Ideal local energy at one point * @param r input radial distance * @param Z nuclear charge * @param beta0 adjustable parameter to make energy continuous at Rc */ - static RealType - getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0); + static RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0); - /** Ideal local energy at a vector of points + /** Ideal local energy at a vector of points * @param pos input vector of radial distances * @param Z nuclear charge * @param Rc cutoff radius where the correction meets the actual orbital @@ -155,11 +150,13 @@ class CuspCorrectionConstructionT * continuous at Rc * @param ELideal - output the ideal local energy at pos values */ - static void - getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, - RealType ELorigAtRc, ValueVector& ELideal); + static void getIdealLocalEnergy(const ValueVector& pos, + RealType Z, + RealType Rc, + RealType ELorigAtRc, + ValueVector& ELideal); - /** Evaluate various orbital quantities that enter as constraints on the + /** Evaluate various orbital quantities that enter as constraints on the * correction * @param valRc orbital value at Rc * @param gradRc orbital gradient at Rc @@ -171,33 +168,33 @@ class CuspCorrectionConstructionT * @param eta0 value of non-corrected pieces of the orbital at zero * @param X output */ - static void - evalX(RealType valRc, GradType gradRc, ValueType lapRc, RealType Rc, - RealType Z, RealType C, RealType valAtZero, RealType eta0, - TinyVector& X); - - /** Convert constraints to polynomial parameters + static void evalX(RealType valRc, + GradType gradRc, + ValueType lapRc, + RealType Rc, + RealType Z, + RealType C, + RealType valAtZero, + RealType eta0, + TinyVector& X); + + /** Convert constraints to polynomial parameters * @param X input from evalX * @param Rc cutoff radius * @param alpha output the polynomial parameters for the correction */ - static void - X2alpha(const TinyVector& X, RealType Rc, - TinyVector& alpha); + static void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha); - /** Effective nuclear charge to keep effective local energy finite at zero + /** Effective nuclear charge to keep effective local energy finite at zero * @param Z nuclear charge * @param etaAtZero value of non-S orbitals at this center * @param phiBarAtZero value of corrected orbital at zero */ - static RealType - getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero); + static RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero); - static RealType - phiBar(const CuspCorrectionT& cusp, RealType r, - OneMolecularOrbitalT& phiMO); + static RealType phiBar(const CuspCorrectionT& cusp, RealType r, OneMolecularOrbitalT& phiMO); - /** Compute effective local energy at vector of points + /** Compute effective local energy at vector of points * @param pos input vector of radial distances * @param Zeff effective charge from getZeff * @param Rc cutoff radius @@ -207,12 +204,15 @@ class CuspCorrectionConstructionT * @param phiMO uncorrected orbital (S-orbitals on this center only) * @param ELcurr output local energy at each distance in pos */ - static void - getCurrentLocalEnergy(const ValueVector& pos, RealType Zeff, RealType Rc, - RealType originalELatRc, CuspCorrectionT& cusp, - OneMolecularOrbitalT& phiMO, ValueVector& ELcurr); - - /** Local energy from uncorrected orbital + static void getCurrentLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + RealType originalELatRc, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValueVector& ELcurr); + + /** Local energy from uncorrected orbital * @param pos input vector of radial distances * @param Zeff nuclear charge * @param Rc cutoff radius @@ -223,19 +223,20 @@ class CuspCorrectionConstructionT * subsequent computations. The routine can be called with an empty vector * of positions to get just this value. */ - static RealType - getOriginalLocalEnergy(const ValueVector& pos, RealType Zeff, RealType Rc, - OneMolecularOrbitalT& phiMO, ValueVector& Elorig); + static RealType getOriginalLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + OneMolecularOrbitalT& phiMO, + ValueVector& Elorig); - /** Sum of squares difference between the current and ideal local energies + /** Sum of squares difference between the current and ideal local energies * This is the objective function to be minimized. * @param Elcurr current local energy * @param Elideal ideal local energy */ - static RealType - getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal); + static RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal); - /** Minimize chi2 with respect to phi at zero for a fixed Rc + /** Minimize chi2 with respect to phi at zero for a fixed Rc * @param cusp correction parameters * @param phiMO uncorrected orbital (S-orbitals on this center only) * @param Z nuclear charge @@ -246,13 +247,16 @@ class CuspCorrectionConstructionT * @param Elcurr storage for current local energy * @param Elideal storage for ideal local energy */ - static RealType - minimizeForPhiAtZero(CuspCorrectionT& cusp, - OneMolecularOrbitalT& phiMO, RealType Z, RealType eta0, - ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal, - RealType start_phi0); - - /** Minimize chi2 with respect to Rc and phi at zero. + static RealType minimizeForPhiAtZero(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + RealType start_phi0); + + /** Minimize chi2 with respect to Rc and phi at zero. * @param cusp correction parameters * @param phiMO uncorrected orbital (S-orbitals on this center only) * @param Z nuclear charge @@ -267,46 +271,56 @@ class CuspCorrectionConstructionT * * Output is parameter values in cusp.cparam */ - static void - minimizeForRc(CuspCorrectionT& cusp, OneMolecularOrbitalT& phiMO, - RealType Z, RealType Rc_init, RealType Rc_max, RealType eta0, - ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal); - - // Modifies orbital set lcwc - static void - applyCuspCorrection(const Matrix>& info, - ParticleSetT& targetPtcl, ParticleSetT& sourcePtcl, - LCAOrbitalSetT& lcao, SoaCuspCorrectionT& cusp, - const std::string& id); - - static void - generateCuspInfo(Matrix>& info, - const ParticleSetT& targetPtcl, const ParticleSetT& sourcePtcl, - const LCAOrbitalSetT& lcao, const std::string& id, - Communicate& Comm); - - /// Broadcast cusp correction parameters - static void - broadcastCuspInfo( - CuspCorrectionParametersT& param, Communicate& Comm, int root); - - /// Read cusp correction parameters from XML file - static bool - readCuspInfo(const std::string& cuspInfoFile, const std::string& objectName, - int OrbitalSetSize, Matrix>& info); - - /// save cusp correction info to a file. - static void - saveCusp(const std::string& filename, - const Matrix>& info, - const std::string& id); + static void minimizeForRc(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType Rc_init, + RealType Rc_max, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal); + + // Modifies orbital set lcwc + static void applyCuspCorrection(const Matrix>& info, + ParticleSetT& targetPtcl, + ParticleSetT& sourcePtcl, + LCAOrbitalSetT& lcao, + SoaCuspCorrectionT& cusp, + const std::string& id); + + static void generateCuspInfo(Matrix>& info, + const ParticleSetT& targetPtcl, + const ParticleSetT& sourcePtcl, + const LCAOrbitalSetT& lcao, + const std::string& id, + Communicate& Comm); + + /// Broadcast cusp correction parameters + static void broadcastCuspInfo(CuspCorrectionParametersT& param, Communicate& Comm, int root); + + /// Read cusp correction parameters from XML file + static bool readCuspInfo(const std::string& cuspInfoFile, + const std::string& objectName, + int OrbitalSetSize, + Matrix>& info); + + /// save cusp correction info to a file. + static void saveCusp(const std::string& filename, + const Matrix>& info, + const std::string& id); private: - static RealType - evaluateForPhi0Body(RealType phi0, ValueVector& pos, ValueVector& ELcurr, - ValueVector& ELideal, CuspCorrectionT& cusp, - OneMolecularOrbitalT& phiMO, ValGradLap phiAtRc, RealType etaAtZero, - RealType ELorigAtRc, RealType Z); + static RealType evaluateForPhi0Body(RealType phi0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValGradLap phiAtRc, + RealType etaAtZero, + RealType ELorigAtRc, + RealType Z); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h index 18fa1ed5315..cb8eab7130c 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h @@ -40,74 +40,61 @@ namespace qmcplusplus * parameters in those equations. */ -template +template struct CuspCorrectionParametersT { - using ValueType = typename OrbitalSetTraits::ValueType; - using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using RealType = typename OrbitalSetTraits::RealType; - /// The cutoff radius - RealType Rc; + /// The cutoff radius + RealType Rc; - /// A shift to keep correction to a single sign - RealType C; + /// A shift to keep correction to a single sign + RealType C; - /// The sign of the wavefunction at the nucleus - RealType sg; + /// The sign of the wavefunction at the nucleus + RealType sg; - /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8 - TinyVector alpha; + /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8 + TinyVector alpha; - /// Flag to indicate the correction should be recalculated - int redo; + /// Flag to indicate the correction should be recalculated + int redo; - CuspCorrectionParametersT() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) - { - } + CuspCorrectionParametersT() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) {} }; /// Formulas for applying the cusp correction -template +template class CuspCorrectionT { - using RealType = typename OrbitalSetTraits::RealType; + using RealType = typename OrbitalSetTraits::RealType; public: - inline RealType - Rr(RealType r) const - { - return cparam.sg * std::exp(pr(r)); - } - - inline RealType - pr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[0] + alpha[1] * r + alpha[2] * r * r + - alpha[3] * r * r * r + alpha[4] * r * r * r * r; - } - - inline RealType - dpr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + - 4.0 * alpha[4] * r * r * r; - } - - inline RealType - d2pr(RealType r) const - { - auto& alpha = cparam.alpha; - return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r; - } - - CuspCorrectionT(const CuspCorrectionParametersT& param) : cparam(param) - { - } - - CuspCorrectionParametersT cparam; + inline RealType Rr(RealType r) const { return cparam.sg * std::exp(pr(r)); } + + inline RealType pr(RealType r) const + { + auto& alpha = cparam.alpha; + return alpha[0] + alpha[1] * r + alpha[2] * r * r + alpha[3] * r * r * r + alpha[4] * r * r * r * r; + } + + inline RealType dpr(RealType r) const + { + auto& alpha = cparam.alpha; + return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + 4.0 * alpha[4] * r * r * r; + } + + inline RealType d2pr(RealType r) const + { + auto& alpha = cparam.alpha; + return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r; + } + + CuspCorrectionT(const CuspCorrectionParametersT& param) : cparam(param) {} + + CuspCorrectionParametersT cparam; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp index 14de5a549cf..f1a8565d10c 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp @@ -21,190 +21,193 @@ namespace qmcplusplus { -template +template LCAOSpinorBuilderT::LCAOSpinorBuilderT(ParticleSetT& els, - ParticleSetT& ions, Communicate* comm, xmlNodePtr cur) : - LCAOrbitalBuilderT(els, ions, comm, cur) + ParticleSetT& ions, + Communicate* comm, + xmlNodePtr cur) + : LCAOrbitalBuilderT(els, ions, comm, cur) { - this->ClassName = "LCAOSpinorBuilder"; + this->ClassName = "LCAOSpinorBuilder"; - if (this->h5_path == "") - this->myComm->barrier_and_abort( - "LCAOSpinorBuilder only works with href"); + if (this->h5_path == "") + this->myComm->barrier_and_abort("LCAOSpinorBuilder only works with href"); } -template -std::unique_ptr> -LCAOSpinorBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> LCAOSpinorBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); - std::string spo_name(""), optimize("no"); - std::string basisset_name("LCAOBSet"); - OhmmsAttributeSet spoAttrib; - spoAttrib.add(spo_name, "name"); - spoAttrib.add(optimize, "optimize"); - spoAttrib.add(basisset_name, "basisset"); - spoAttrib.put(cur); - - BasisSet_t* myBasisSet = nullptr; - if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end()) - this->myComm->barrier_and_abort( - "basisset \"" + basisset_name + "\" cannot be found\n"); - else - myBasisSet = this->basisset_map_[basisset_name].get(); - - if (optimize == "yes") - app_log() << " SPOSet " << spo_name << " is optimizable\n"; - - auto upspo = std::make_unique>( - spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); - auto dnspo = std::make_unique>( - spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); - - loadMO(*upspo, *dnspo, cur); - - // create spinor and register up/dn - auto spinor_set = std::make_unique>(spo_name); - spinor_set->set_spos(std::move(upspo), std::move(dnspo)); - return spinor_set; + ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); + std::string spo_name(""), optimize("no"); + std::string basisset_name("LCAOBSet"); + OhmmsAttributeSet spoAttrib; + spoAttrib.add(spo_name, "name"); + spoAttrib.add(optimize, "optimize"); + spoAttrib.add(basisset_name, "basisset"); + spoAttrib.put(cur); + + BasisSet_t* myBasisSet = nullptr; + if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end()) + this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); + else + myBasisSet = this->basisset_map_[basisset_name].get(); + + if (optimize == "yes") + app_log() << " SPOSet " << spo_name << " is optimizable\n"; + + auto upspo = + std::make_unique>(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); + auto dnspo = + std::make_unique>(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); + + loadMO(*upspo, *dnspo, cur); + + // create spinor and register up/dn + auto spinor_set = std::make_unique>(spo_name); + spinor_set->set_spos(std::move(upspo), std::move(dnspo)); + return spinor_set; } -template -bool -LCAOSpinorBuilderT::loadMO( - LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur) +template +bool LCAOSpinorBuilderT::loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur) { - bool PBC = false; - int norb = up.getBasisSetSize(); - std::string debugc("no"); - OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "size"); - aAttrib.add(debugc, "debug"); - aAttrib.put(cur); - - up.setOrbitalSetSize(norb); - dn.setOrbitalSetSize(norb); - - xmlNodePtr occ_ptr = nullptr; - cur = cur->xmlChildrenNode; - while (cur != nullptr) { - std::string cname((const char*)(cur->name)); - if (cname == "occupation") { - occ_ptr = cur; - } - cur = cur->next; + bool PBC = false; + int norb = up.getBasisSetSize(); + std::string debugc("no"); + OhmmsAttributeSet aAttrib; + aAttrib.add(norb, "size"); + aAttrib.add(debugc, "debug"); + aAttrib.put(cur); + + up.setOrbitalSetSize(norb); + dn.setOrbitalSetSize(norb); + + xmlNodePtr occ_ptr = nullptr; + cur = cur->xmlChildrenNode; + while (cur != nullptr) + { + std::string cname((const char*)(cur->name)); + if (cname == "occupation") + { + occ_ptr = cur; } - - hdf_archive hin(this->myComm); - if (this->myComm->rank() == 0) { - if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) - this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing " - "or incorrect path to H5 file."); - hin.push("PBC"); - PBC = false; - hin.read(PBC, "PBC"); - hin.close(); - } - this->myComm->bcast(PBC); - if (PBC) - this->myComm->barrier_and_abort( - "LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); - - bool success = putFromH5(up, dn, occ_ptr); - - if (debugc == "yes") { - app_log() << "UP: Single-particle orbital coefficients dims=" - << up.C->rows() << " x " << up.C->cols() << std::endl; - app_log() << *up.C << std::endl; - app_log() << "DN: Single-particle orbital coefficients dims=" - << dn.C->rows() << " x " << dn.C->cols() << std::endl; - app_log() << *dn.C << std::endl; - } - return success; + cur = cur->next; + } + + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) + { + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing " + "or incorrect path to H5 file."); + hin.push("PBC"); + PBC = false; + hin.read(PBC, "PBC"); + hin.close(); + } + this->myComm->bcast(PBC); + if (PBC) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); + + bool success = putFromH5(up, dn, occ_ptr); + + if (debugc == "yes") + { + app_log() << "UP: Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols() + << std::endl; + app_log() << *up.C << std::endl; + app_log() << "DN: Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols() + << std::endl; + app_log() << *dn.C << std::endl; + } + return success; } -template -bool -LCAOSpinorBuilderT::putFromH5( - LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr occ_ptr) +template +bool LCAOSpinorBuilderT::putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr occ_ptr) { - if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) { - this->myComm->barrier_and_abort( - "LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); - return false; + if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) + { + this->myComm->barrier_and_abort("LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); + return false; + } + + bool success = true; + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) + { + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or " + "incorrect path to H5 file"); + + Matrix upReal; + Matrix upImag; + std::string setname = "/Super_Twist/eigenset_0"; + this->readRealMatrixFromH5(hin, setname, upReal); + setname += "_imag"; + this->readRealMatrixFromH5(hin, setname, upImag); + + + assert(upReal.rows() == upImag.rows()); + assert(upReal.cols() == upImag.cols()); + + Matrix upTemp(upReal.rows(), upReal.cols()); + for (int i = 0; i < upTemp.rows(); i++) + { + for (int j = 0; j < upTemp.cols(); j++) + { + upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; + } } - bool success = true; - hdf_archive hin(this->myComm); - if (this->myComm->rank() == 0) { - if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) - this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or " - "incorrect path to H5 file"); + Matrix dnReal; + Matrix dnImag; + setname = "/Super_Twist/eigenset_1"; + this->readRealMatrixFromH5(hin, setname, dnReal); + setname += "_imag"; + this->readRealMatrixFromH5(hin, setname, dnImag); - Matrix upReal; - Matrix upImag; - std::string setname = "/Super_Twist/eigenset_0"; - this->readRealMatrixFromH5(hin, setname, upReal); - setname += "_imag"; - this->readRealMatrixFromH5(hin, setname, upImag); + assert(dnReal.rows() == dnImag.rows()); + assert(dnReal.cols() == dnImag.cols()); + Matrix dnTemp(dnReal.rows(), dnReal.cols()); + for (int i = 0; i < dnTemp.rows(); i++) + { + for (int j = 0; j < dnTemp.cols(); j++) + { + dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]); + } + } + + assert(upReal.rows() == dnReal.rows()); + assert(upReal.cols() == dnReal.cols()); - assert(upReal.rows() == upImag.rows()); - assert(upReal.cols() == upImag.cols()); + this->Occ.resize(upReal.rows()); + success = this->putOccupation(up, occ_ptr); - Matrix upTemp(upReal.rows(), upReal.cols()); - for (int i = 0; i < upTemp.rows(); i++) + int norbs = up.getOrbitalSetSize(); + + int n = 0, i = 0; + while (i < norbs) + { + if (this->Occ[n] > 0.0) { - for (int j = 0; j < upTemp.cols(); j++) - { - upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; - } - } - - Matrix dnReal; - Matrix dnImag; - setname = "/Super_Twist/eigenset_1"; - this->readRealMatrixFromH5(hin, setname, dnReal); - setname += "_imag"; - this->readRealMatrixFromH5(hin, setname, dnImag); - - assert(dnReal.rows() == dnImag.rows()); - assert(dnReal.cols() == dnImag.cols()); - - Matrix dnTemp(dnReal.rows(), dnReal.cols()); - for (int i = 0; i < dnTemp.rows(); i++) { - for (int j = 0; j < dnTemp.cols(); j++) { - dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]); - } - } - - assert(upReal.rows() == dnReal.rows()); - assert(upReal.cols() == dnReal.cols()); - - this->Occ.resize(upReal.rows()); - success = this->putOccupation(up, occ_ptr); - - int norbs = up.getOrbitalSetSize(); - - int n = 0, i = 0; - while (i < norbs) { - if (this->Occ[n] > 0.0) { - std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); - std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); - i++; - } - n++; - } - - hin.close(); + std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); + std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); + i++; + } + n++; } + hin.close(); + } + #ifdef HAVE_MPI - this->myComm->comm.broadcast_n(up.C->data(), up.C->size()); - this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); + this->myComm->comm.broadcast_n(up.C->data(), up.C->size()); + this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); #endif - return success; + return success; } #ifdef QMC_COMPLEX diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h index e23014f44dc..9d17cbc8d6a 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h @@ -24,42 +24,39 @@ namespace qmcplusplus * read up and down channel from HDF5 and construct SpinorSet * */ -template +template class LCAOSpinorBuilderT : public LCAOrbitalBuilderT { public: - using BasisSet_t = typename LCAOrbitalBuilderT::BasisSet_t; - using RealType = typename LCAOrbitalBuilderT::RealType; - using ValueType = typename LCAOrbitalBuilderT::ValueType; + using BasisSet_t = typename LCAOrbitalBuilderT::BasisSet_t; + using RealType = typename LCAOrbitalBuilderT::RealType; + using ValueType = typename LCAOrbitalBuilderT::ValueType; - /** constructor + /** constructor * \param els reference to the electrons * \param ions reference to the ions * * Derives from LCAOrbitalBuilder, but will require an h5_path to be set */ - LCAOSpinorBuilderT(ParticleSetT& els, ParticleSetT& ions, - Communicate* comm, xmlNodePtr cur); + LCAOSpinorBuilderT(ParticleSetT& els, ParticleSetT& ions, Communicate* comm, xmlNodePtr cur); - /** creates and returns SpinorSet + /** creates and returns SpinorSet * * Creates an up and down LCAOrbitalSet * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file * registers up and down into a SpinorSet and returns */ - std::unique_ptr> - createSPOSetFromXML(xmlNodePtr cur) override; + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; private: - /** load the up and down MO sets + /** load the up and down MO sets * * checks to make sure not PBC and initialize the Occ vector. * call putFromH5 to parse the up and down MO coefficients */ - bool - loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur); + bool loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur); - /** parse h5 file for spinor info + /** parse h5 file for spinor info * * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part * of up component of spinor assumes the h5 file as KPTS_0/eigenset_1(_imag) @@ -67,8 +64,7 @@ class LCAOSpinorBuilderT : public LCAOrbitalBuilderT * coefficient matricies and broadcast after this, we have up/dn * LCAOrbitalSet that can be registered to the SpinorSet */ - bool - putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr); + bool putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr); }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp index 5abad9e9500..9c694c0c26e 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp @@ -48,1139 +48,1137 @@ namespace qmcplusplus * SH {0=cartesian, 1=spherical} * If too confusing, inroduce enumeration. */ -template +template struct ao_traits -{ -}; +{}; /** specialization for numerical-cartesian AO */ -template +template struct ao_traits { - using radial_type = MultiQuinticSpline1D; - using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSetT; - using basis_type = SoaLocalizedBasisSetT; + using radial_type = MultiQuinticSpline1D; + using angular_type = SoaCartesianTensor; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for numerical-spherical AO */ -template +template struct ao_traits { - using radial_type = MultiQuinticSpline1D; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSetT; - using basis_type = SoaLocalizedBasisSetT; + using radial_type = MultiQuinticSpline1D; + using angular_type = SoaSphericalTensor; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for GTO-cartesian AO */ -template +template struct ao_traits { - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSetT; - using basis_type = SoaLocalizedBasisSetT; + using radial_type = MultiFunctorAdapter>; + using angular_type = SoaCartesianTensor; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for GTO-cartesian AO */ -template +template struct ao_traits { - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSetT; - using basis_type = SoaLocalizedBasisSetT; + using radial_type = MultiFunctorAdapter>; + using angular_type = SoaSphericalTensor; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; /** specialization for STO-spherical AO */ -template +template struct ao_traits { - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSetT; - using basis_type = SoaLocalizedBasisSetT; + using radial_type = MultiFunctorAdapter>; + using angular_type = SoaSphericalTensor; + using ao_type = SoaAtomicBasisSetT; + using basis_type = SoaLocalizedBasisSetT; }; -inline bool -is_same(const xmlChar* a, const char* b) -{ - return !strcmp((const char*)a, b); -} +inline bool is_same(const xmlChar* a, const char* b) { return !strcmp((const char*)a, b); } -template +template LCAOrbitalBuilderT::LCAOrbitalBuilderT(ParticleSetT& els, - ParticleSetT& ions, Communicate* comm, xmlNodePtr cur) : - SPOSetBuilderT("LCAO", comm), - targetPtcl(els), - sourcePtcl(ions), - h5_path(""), - SuperTwist(0.0), - doCuspCorrection(false) + ParticleSetT& ions, + Communicate* comm, + xmlNodePtr cur) + : SPOSetBuilderT("LCAO", comm), + targetPtcl(els), + sourcePtcl(ions), + h5_path(""), + SuperTwist(0.0), + doCuspCorrection(false) { - this->ClassName = "LCAOrbitalBuilder"; - ReportEngine PRE(this->ClassName, "createBasisSet"); - - std::string cuspC("no"); // cusp correction - OhmmsAttributeSet aAttrib; - aAttrib.add(cuspC, "cuspCorrection"); - aAttrib.add(h5_path, "href"); - aAttrib.add(PBCImages, "PBCimages"); - aAttrib.add(SuperTwist, "twist"); - aAttrib.put(cur); - - if (cuspC == "yes") - doCuspCorrection = true; - // Evaluate the Phase factor. Equals 1 for OBC. - EvalPeriodicImagePhaseFactors(SuperTwist, PeriodicImagePhaseFactors); - - // no need to wait but load the basis set - processChildren( - cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "basisset") { - std::string basisset_name_input( - getXMLAttributeValue(element, "name")); - std::string basisset_name(basisset_name_input.empty() ? - "LCAOBSet" : - basisset_name_input); - if (basisset_map_.find(basisset_name) != basisset_map_.end()) { - std::ostringstream err_msg; - err_msg << "Cannot create basisset " << basisset_name - << " which already exists." << std::endl; - throw std::runtime_error(err_msg.str()); - } - if (h5_path != "") - basisset_map_[basisset_name] = loadBasisSetFromH5(element); - else - basisset_map_[basisset_name] = - loadBasisSetFromXML(element, cur); - } - }); - - // deprecated h5 basis set handling when basisset element is missing - if (basisset_map_.size() == 0 && h5_path != "") { - app_warning() - << "!!!!!!! Deprecated input style: missing basisset element. " - << "LCAO needs an explicit basisset XML element. " - << "Fallback on loading an implicit one." << std::endl; - basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur); + this->ClassName = "LCAOrbitalBuilder"; + ReportEngine PRE(this->ClassName, "createBasisSet"); + + std::string cuspC("no"); // cusp correction + OhmmsAttributeSet aAttrib; + aAttrib.add(cuspC, "cuspCorrection"); + aAttrib.add(h5_path, "href"); + aAttrib.add(PBCImages, "PBCimages"); + aAttrib.add(SuperTwist, "twist"); + aAttrib.put(cur); + + if (cuspC == "yes") + doCuspCorrection = true; + // Evaluate the Phase factor. Equals 1 for OBC. + EvalPeriodicImagePhaseFactors(SuperTwist, PeriodicImagePhaseFactors); + + // no need to wait but load the basis set + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "basisset") + { + std::string basisset_name_input(getXMLAttributeValue(element, "name")); + std::string basisset_name(basisset_name_input.empty() ? "LCAOBSet" : basisset_name_input); + if (basisset_map_.find(basisset_name) != basisset_map_.end()) + { + std::ostringstream err_msg; + err_msg << "Cannot create basisset " << basisset_name << " which already exists." << std::endl; + throw std::runtime_error(err_msg.str()); + } + if (h5_path != "") + basisset_map_[basisset_name] = loadBasisSetFromH5(element); + else + basisset_map_[basisset_name] = loadBasisSetFromXML(element, cur); } - - if (basisset_map_.size() == 0) - throw std::runtime_error("No basisset found in the XML input!"); + }); + + // deprecated h5 basis set handling when basisset element is missing + if (basisset_map_.size() == 0 && h5_path != "") + { + app_warning() << "!!!!!!! Deprecated input style: missing basisset element. " + << "LCAO needs an explicit basisset XML element. " + << "Fallback on loading an implicit one." << std::endl; + basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur); + } + + if (basisset_map_.size() == 0) + throw std::runtime_error("No basisset found in the XML input!"); } -template +template LCAOrbitalBuilderT::~LCAOrbitalBuilderT() { - // properly cleanup + // properly cleanup } -template -int -LCAOrbitalBuilderT::determineRadialOrbType(xmlNodePtr cur) const +template +int LCAOrbitalBuilderT::determineRadialOrbType(xmlNodePtr cur) const { - std::string keyOpt; - std::string transformOpt; - OhmmsAttributeSet aAttrib; - aAttrib.add(keyOpt, "keyword"); - aAttrib.add(keyOpt, "key"); - aAttrib.add(transformOpt, "transform"); - aAttrib.put(cur); - - int radialOrbType = -1; - if (transformOpt == "yes" || keyOpt == "NMO") - radialOrbType = 0; - else { - if (keyOpt == "GTO") - radialOrbType = 1; - if (keyOpt == "STO") - radialOrbType = 2; - } - return radialOrbType; + std::string keyOpt; + std::string transformOpt; + OhmmsAttributeSet aAttrib; + aAttrib.add(keyOpt, "keyword"); + aAttrib.add(keyOpt, "key"); + aAttrib.add(transformOpt, "transform"); + aAttrib.put(cur); + + int radialOrbType = -1; + if (transformOpt == "yes" || keyOpt == "NMO") + radialOrbType = 0; + else + { + if (keyOpt == "GTO") + radialOrbType = 1; + if (keyOpt == "STO") + radialOrbType = 2; + } + return radialOrbType; } -template -std::unique_ptr::BasisSet_t> -LCAOrbitalBuilderT::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) +template +std::unique_ptr::BasisSet_t> LCAOrbitalBuilderT::loadBasisSetFromXML( + xmlNodePtr cur, + xmlNodePtr parent) { - ReportEngine PRE(this->ClassName, "loadBasisSetFromXML(xmlNodePtr)"); - int ylm = -1; + ReportEngine PRE(this->ClassName, "loadBasisSetFromXML(xmlNodePtr)"); + int ylm = -1; + { + xmlNodePtr cur1 = cur->xmlChildrenNode; + while (cur1 != NULL && ylm < 0) { - xmlNodePtr cur1 = cur->xmlChildrenNode; - while (cur1 != NULL && ylm < 0) { - if (is_same(cur1->name, "atomicBasisSet")) { - std::string sph; - OhmmsAttributeSet att; - att.add(sph, "angular"); - att.put(cur1); - ylm = (sph == "cartesian") ? 0 : 1; - } - cur1 = cur1->next; - } - } - - if (ylm < 0) - PRE.error("Missing angular attribute of atomicBasisSet.", true); - - int radialOrbType = determineRadialOrbType(cur); - if (radialOrbType < 0) { - app_warning() << "Radial orbital type cannot be determined based on " - "the attributes of basisset line. " - << "Trying the parent element." << std::endl; - radialOrbType = determineRadialOrbType(parent); - } - - if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify " - "keyword=\"NMO/GTO/STO\" .", - true); - - BasisSet_t* myBasisSet = nullptr; - /** process atomicBasisSet per ion species */ - switch (radialOrbType) { - case (0): // numerical - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - if (ylm) - myBasisSet = createBasisSet<0, 1>(cur); - else - myBasisSet = createBasisSet<0, 0>(cur); - break; - case (1): // gto - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - if (ylm) - myBasisSet = createBasisSet<1, 1>(cur); - else - myBasisSet = createBasisSet<1, 0>(cur); - break; - case (2): // sto - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - myBasisSet = createBasisSet<2, 1>(cur); - break; - default: - PRE.error("Cannot construct SoaAtomicBasisSetT.", true); - break; + if (is_same(cur1->name, "atomicBasisSet")) + { + std::string sph; + OhmmsAttributeSet att; + att.add(sph, "angular"); + att.put(cur1); + ylm = (sph == "cartesian") ? 0 : 1; + } + cur1 = cur1->next; } - - return std::unique_ptr(myBasisSet); + } + + if (ylm < 0) + PRE.error("Missing angular attribute of atomicBasisSet.", true); + + int radialOrbType = determineRadialOrbType(cur); + if (radialOrbType < 0) + { + app_warning() << "Radial orbital type cannot be determined based on " + "the attributes of basisset line. " + << "Trying the parent element." << std::endl; + radialOrbType = determineRadialOrbType(parent); + } + + if (radialOrbType < 0) + PRE.error("Unknown radial function for LCAO orbitals. Specify " + "keyword=\"NMO/GTO/STO\" .", + true); + + BasisSet_t* myBasisSet = nullptr; + /** process atomicBasisSet per ion species */ + switch (radialOrbType) + { + case (0): // numerical + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + if (ylm) + myBasisSet = createBasisSet<0, 1>(cur); + else + myBasisSet = createBasisSet<0, 0>(cur); + break; + case (1): // gto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + if (ylm) + myBasisSet = createBasisSet<1, 1>(cur); + else + myBasisSet = createBasisSet<1, 0>(cur); + break; + case (2): // sto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + myBasisSet = createBasisSet<2, 1>(cur); + break; + default: + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); + break; + } + + return std::unique_ptr(myBasisSet); } -template -std::unique_ptr::BasisSet_t> -LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) +template +std::unique_ptr::BasisSet_t> LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) { - ReportEngine PRE(this->ClassName, "loadBasisSetFromH5()"); + ReportEngine PRE(this->ClassName, "loadBasisSetFromH5()"); - hdf_archive hin(this->myComm); - int ylm = -1; - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - PRE.error("Could not open H5 file", true); + hdf_archive hin(this->myComm); + int ylm = -1; + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + PRE.error("Could not open H5 file", true); - hin.push("basisset", false); + hin.push("basisset", false); - std::string sph; - std::string ElemID0 = "atomicBasisSet0"; + std::string sph; + std::string ElemID0 = "atomicBasisSet0"; - hin.push(ElemID0.c_str(), false); + hin.push(ElemID0.c_str(), false); - if (!hin.readEntry(sph, "angular")) - PRE.error("Could not find name of basisset group in H5; Probably " - "Corrupt H5 file", + if (!hin.readEntry(sph, "angular")) + PRE.error("Could not find name of basisset group in H5; Probably " + "Corrupt H5 file", true); - ylm = (sph == "cartesian") ? 0 : 1; - hin.close(); - } - - this->myComm->bcast(ylm); - if (ylm < 0) - PRE.error("Missing angular attribute of atomicBasisSet.", true); - - int radialOrbType = determineRadialOrbType(parent); - if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify " - "keyword=\"NMO/GTO/STO\" .", - true); - - BasisSet_t* myBasisSet = nullptr; - /** process atomicBasisSet per ion species */ - switch (radialOrbType) { - case (0): // numerical - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - if (ylm) - myBasisSet = createBasisSetH5<0, 1>(); - else - myBasisSet = createBasisSetH5<0, 0>(); - break; - case (1): // gto - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - if (ylm) - myBasisSet = createBasisSetH5<1, 1>(); - else - myBasisSet = createBasisSetH5<1, 0>(); - break; - case (2): // sto - app_log() << " LCAO: SoaAtomicBasisSetT" - << std::endl; - myBasisSet = createBasisSetH5<2, 1>(); - break; - default: - PRE.error("Cannot construct SoaAtomicBasisSetT.", true); - break; - } - return std::unique_ptr(myBasisSet); + ylm = (sph == "cartesian") ? 0 : 1; + hin.close(); + } + + this->myComm->bcast(ylm); + if (ylm < 0) + PRE.error("Missing angular attribute of atomicBasisSet.", true); + + int radialOrbType = determineRadialOrbType(parent); + if (radialOrbType < 0) + PRE.error("Unknown radial function for LCAO orbitals. Specify " + "keyword=\"NMO/GTO/STO\" .", + true); + + BasisSet_t* myBasisSet = nullptr; + /** process atomicBasisSet per ion species */ + switch (radialOrbType) + { + case (0): // numerical + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + if (ylm) + myBasisSet = createBasisSetH5<0, 1>(); + else + myBasisSet = createBasisSetH5<0, 0>(); + break; + case (1): // gto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + if (ylm) + myBasisSet = createBasisSetH5<1, 1>(); + else + myBasisSet = createBasisSetH5<1, 0>(); + break; + case (2): // sto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; + myBasisSet = createBasisSetH5<2, 1>(); + break; + default: + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); + break; + } + return std::unique_ptr(myBasisSet); } -template -template -typename LCAOrbitalBuilderT::BasisSet_t* -LCAOrbitalBuilderT::createBasisSet(xmlNodePtr cur) +template +template +typename LCAOrbitalBuilderT::BasisSet_t* LCAOrbitalBuilderT::createBasisSet(xmlNodePtr cur) { - ReportEngine PRE(this->ClassName, "createBasisSet(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "createBasisSet(xmlNodePtr)"); + + using ao_type = typename ao_traits::ao_type; + using basis_type = typename ao_traits::basis_type; - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; + basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); + // list of built centers + std::vector ao_built_centers; - // list of built centers - std::vector ao_built_centers; + /** process atomicBasisSet per ion species */ + cur = cur->xmlChildrenNode; + while (cur != NULL) // loop over unique ioons + { + std::string cname((const char*)(cur->name)); - /** process atomicBasisSet per ion species */ - cur = cur->xmlChildrenNode; - while (cur != NULL) // loop over unique ioons + if (cname == "atomicBasisSet") { - std::string cname((const char*)(cur->name)); - - if (cname == "atomicBasisSet") { - std::string elementType; - std::string sph; - OhmmsAttributeSet att; - att.add(elementType, "elementType"); - att.put(cur); - - if (elementType.empty()) - PRE.error( - "Missing elementType attribute of atomicBasisSet.", true); - - auto it = std::find( - ao_built_centers.begin(), ao_built_centers.end(), elementType); - if (it == ao_built_centers.end()) { - AOBasisBuilderT any(elementType, this->myComm); - any.put(cur); - auto aoBasis = any.createAOSet(cur); - if (aoBasis) { - // add the new atomic basis to the basis set - int activeCenter = - sourcePtcl.getSpeciesSet().findSpecies(elementType); - mBasisSet->add(activeCenter, std::move(aoBasis)); - } - ao_built_centers.push_back(elementType); - } + std::string elementType; + std::string sph; + OhmmsAttributeSet att; + att.add(elementType, "elementType"); + att.put(cur); + + if (elementType.empty()) + PRE.error("Missing elementType attribute of atomicBasisSet.", true); + + auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); + if (it == ao_built_centers.end()) + { + AOBasisBuilderT any(elementType, this->myComm); + any.put(cur); + auto aoBasis = any.createAOSet(cur); + if (aoBasis) + { + // add the new atomic basis to the basis set + int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); + mBasisSet->add(activeCenter, std::move(aoBasis)); } - cur = cur->next; - } // done with basis set - mBasisSet->setBasisSetSize(-1); - mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); - return mBasisSet; + ao_built_centers.push_back(elementType); + } + } + cur = cur->next; + } // done with basis set + mBasisSet->setBasisSetSize(-1); + mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); + return mBasisSet; } -template -template -typename LCAOrbitalBuilderT::BasisSet_t* -LCAOrbitalBuilderT::createBasisSetH5() +template +template +typename LCAOrbitalBuilderT::BasisSet_t* LCAOrbitalBuilderT::createBasisSetH5() { - ReportEngine PRE(this->ClassName, "createBasisSetH5(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "createBasisSetH5(xmlNodePtr)"); - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; + using ao_type = typename ao_traits::ao_type; + using basis_type = typename ao_traits::basis_type; - basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); + basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - // list of built centers - std::vector ao_built_centers; + // list of built centers + std::vector ao_built_centers; - int Nb_Elements(0); - std::string basiset_name; + int Nb_Elements(0); + std::string basiset_name; - /** process atomicBasisSet per ion species */ - app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl; + /** process atomicBasisSet per ion species */ + app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl; - hdf_archive hin(this->myComm); - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - PRE.error("Could not open H5 file", true); + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + PRE.error("Could not open H5 file", true); - hin.push("basisset", false); + hin.push("basisset", false); - hin.read(Nb_Elements, "NbElements"); - } + hin.read(Nb_Elements, "NbElements"); + } - this->myComm->bcast(Nb_Elements); - if (Nb_Elements < 1) - PRE.error("Missing elementType attribute of atomicBasisSet.", true); + this->myComm->bcast(Nb_Elements); + if (Nb_Elements < 1) + PRE.error("Missing elementType attribute of atomicBasisSet.", true); - for (int i = 0; i < Nb_Elements; i++) { - std::string elementType, dataset; - std::stringstream tempElem; - std::string ElemID0 = "atomicBasisSet", ElemType; - tempElem << ElemID0 << i; - ElemType = tempElem.str(); - - if (this->myComm->rank() == 0) { - hin.push(ElemType.c_str(), false); - - if (!hin.readEntry(basiset_name, "name")) - PRE.error("Could not find name of basisset group in H5; " - "Probably Corrupt H5 file", - true); - if (!hin.readEntry(elementType, "elementType")) - PRE.error("Could not read elementType in H5; Probably Corrupt " - "H5 file", - true); - } - this->myComm->bcast(basiset_name); - this->myComm->bcast(elementType); - - auto it = std::find( - ao_built_centers.begin(), ao_built_centers.end(), elementType); - if (it == ao_built_centers.end()) { - AOBasisBuilderT any(elementType, this->myComm); - any.putH5(hin); - auto aoBasis = any.createAOSetH5(hin); - if (aoBasis) { - // add the new atomic basis to the basis set - int activeCenter = - sourcePtcl.getSpeciesSet().findSpecies(elementType); - mBasisSet->add(activeCenter, std::move(aoBasis)); - } - ao_built_centers.push_back(elementType); - } + for (int i = 0; i < Nb_Elements; i++) + { + std::string elementType, dataset; + std::stringstream tempElem; + std::string ElemID0 = "atomicBasisSet", ElemType; + tempElem << ElemID0 << i; + ElemType = tempElem.str(); - if (this->myComm->rank() == 0) - hin.pop(); + if (this->myComm->rank() == 0) + { + hin.push(ElemType.c_str(), false); + + if (!hin.readEntry(basiset_name, "name")) + PRE.error("Could not find name of basisset group in H5; " + "Probably Corrupt H5 file", + true); + if (!hin.readEntry(elementType, "elementType")) + PRE.error("Could not read elementType in H5; Probably Corrupt " + "H5 file", + true); } + this->myComm->bcast(basiset_name); + this->myComm->bcast(elementType); - if (this->myComm->rank() == 0) { - hin.pop(); - hin.close(); + auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); + if (it == ao_built_centers.end()) + { + AOBasisBuilderT any(elementType, this->myComm); + any.putH5(hin); + auto aoBasis = any.createAOSetH5(hin); + if (aoBasis) + { + // add the new atomic basis to the basis set + int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); + mBasisSet->add(activeCenter, std::move(aoBasis)); + } + ao_built_centers.push_back(elementType); } - mBasisSet->setBasisSetSize(-1); - mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); - return mBasisSet; + + if (this->myComm->rank() == 0) + hin.pop(); + } + + if (this->myComm->rank() == 0) + { + hin.pop(); + hin.close(); + } + mBasisSet->setBasisSetSize(-1); + mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); + return mBasisSet; } #ifndef QMC_COMPLEX -template <> -std::unique_ptr> -LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, - const std::string& spo_name, std::string cusp_file, +template<> +std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection( + xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, std::unique_ptr&& myBasisSet) { - app_summary() << " Using cusp correction." << std::endl; - std::unique_ptr> sposet; - { - auto lcwc = std::make_unique>( - spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); - sposet = std::move(lcwc); - } - - // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for further - // computations. The coordinates get set to nuclear positions, which - // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h - // This problem only appears when the electron positions are specified - // in the input. The random particle placement step executes after this - // part of the code, overwriting the leftover positions from the cusp - // initialization. - ParticleSetT tmp_targetPtcl(targetPtcl); - - const int num_centers = sourcePtcl.getTotalNum(); - auto& lcwc = dynamic_cast&>(*sposet); - - const int orbital_set_size = lcwc.getOrbitalSetSize(); - Matrix> info( - num_centers, orbital_set_size); - - // set a default file name if not given - if (cusp_file.empty()) - cusp_file = spo_name + ".cuspInfo.xml"; - - bool file_exists( - this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); - this->myComm->bcast(file_exists); - app_log() << " Cusp correction file " << cusp_file - << (file_exists ? " exits." : " doesn't exist.") << std::endl; - - // validate file if it exists - if (file_exists) { - bool valid = 0; - if (this->myComm->rank() == 0) - valid = CuspCorrectionConstructionT::readCuspInfo( - cusp_file, spo_name, orbital_set_size, info); - this->myComm->bcast(valid); - if (!valid) - this->myComm->barrier_and_abort( - "Invalid cusp correction file " + cusp_file); + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = + std::make_unique>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } + + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info(num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) + { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo(cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); #ifdef HAVE_MPI - for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - CuspCorrectionConstructionT::broadcastCuspInfo( - info(center_idx, orb_idx), *this->myComm, 0); + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0); #endif - } - else { - CuspCorrectionConstructionT::generateCuspInfo(info, - tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm); - if (this->myComm->rank() == 0) - CuspCorrectionConstructionT::saveCusp( - cusp_file, info, spo_name); - } - - CuspCorrectionConstructionT::applyCuspCorrection( - info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); - - return sposet; + } + else + { + CuspCorrectionConstructionT::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, + *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp(cusp_file, info, spo_name); + } + + CuspCorrectionConstructionT::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, + spo_name); + + return sposet; } -template <> -std::unique_ptr> -LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, - const std::string& spo_name, std::string cusp_file, +template<> +std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection( + xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, std::unique_ptr&& myBasisSet) { - app_summary() << " Using cusp correction." << std::endl; - std::unique_ptr> sposet; - { - auto lcwc = std::make_unique>( - spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); - sposet = std::move(lcwc); - } - - // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for further - // computations. The coordinates get set to nuclear positions, which - // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h - // This problem only appears when the electron positions are specified - // in the input. The random particle placement step executes after this - // part of the code, overwriting the leftover positions from the cusp - // initialization. - ParticleSetT tmp_targetPtcl(targetPtcl); - - const int num_centers = sourcePtcl.getTotalNum(); - auto& lcwc = dynamic_cast&>(*sposet); - - const int orbital_set_size = lcwc.getOrbitalSetSize(); - Matrix> info( - num_centers, orbital_set_size); - - // set a default file name if not given - if (cusp_file.empty()) - cusp_file = spo_name + ".cuspInfo.xml"; - - bool file_exists( - this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); - this->myComm->bcast(file_exists); - app_log() << " Cusp correction file " << cusp_file - << (file_exists ? " exits." : " doesn't exist.") << std::endl; - - // validate file if it exists - if (file_exists) { - bool valid = 0; - if (this->myComm->rank() == 0) - valid = CuspCorrectionConstructionT::readCuspInfo( - cusp_file, spo_name, orbital_set_size, info); - this->myComm->bcast(valid); - if (!valid) - this->myComm->barrier_and_abort( - "Invalid cusp correction file " + cusp_file); + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = + std::make_unique>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } + + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info(num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) + { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo(cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); #ifdef HAVE_MPI - for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - CuspCorrectionConstructionT::broadcastCuspInfo( - info(center_idx, orb_idx), *this->myComm, 0); + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0); #endif - } - else { - CuspCorrectionConstructionT::generateCuspInfo(info, - tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm); - if (this->myComm->rank() == 0) - CuspCorrectionConstructionT::saveCusp( - cusp_file, info, spo_name); - } - - CuspCorrectionConstructionT::applyCuspCorrection( - info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); - - return sposet; + } + else + { + CuspCorrectionConstructionT::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, + *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp(cusp_file, info, spo_name); + } + + CuspCorrectionConstructionT::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, + spo_name); + + return sposet; } #else -template <> -std::unique_ptr>> -LCAOrbitalBuilderT>::createWithCuspCorrection( - xmlNodePtr, const std::string&, std::string, std::unique_ptr&&) +template<> +std::unique_ptr>> LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, + const std::string&, + std::string, + std::unique_ptr&&) { - this->myComm->barrier_and_abort( - "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " - "supported on complex LCAO."); - return std::unique_ptr>>{}; + this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; } -template <> -std::unique_ptr>> -LCAOrbitalBuilderT>::createWithCuspCorrection( - xmlNodePtr, const std::string&, std::string, std::unique_ptr&&) +template<> +std::unique_ptr>> LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, + const std::string&, + std::string, + std::unique_ptr&&) { - this->myComm->barrier_and_abort( - "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " - "supported on complex LCAO."); - return std::unique_ptr>>{}; + this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; } #endif -template -std::unique_ptr> -LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); - std::string spo_name(""), cusp_file(""), optimize("no"); - std::string basisset_name("LCAOBSet"); - OhmmsAttributeSet spoAttrib; - spoAttrib.add(spo_name, "name"); - spoAttrib.add(spo_name, "id"); - spoAttrib.add(cusp_file, "cuspInfo"); - spoAttrib.add(basisset_name, "basisset"); - spoAttrib.put(cur); - - std::unique_ptr myBasisSet; - if (basisset_map_.find(basisset_name) == basisset_map_.end()) - this->myComm->barrier_and_abort( - "basisset \"" + basisset_name + "\" cannot be found\n"); - else - myBasisSet.reset(basisset_map_[basisset_name]->makeClone()); - - std::unique_ptr> sposet; - if (doCuspCorrection) { - sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet)); - } - else { - auto lcos = std::make_unique>( - spo_name, std::move(myBasisSet)); - loadMO(*lcos, cur); - sposet = std::move(lcos); - } - - return sposet; + ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); + std::string spo_name(""), cusp_file(""), optimize("no"); + std::string basisset_name("LCAOBSet"); + OhmmsAttributeSet spoAttrib; + spoAttrib.add(spo_name, "name"); + spoAttrib.add(spo_name, "id"); + spoAttrib.add(cusp_file, "cuspInfo"); + spoAttrib.add(basisset_name, "basisset"); + spoAttrib.put(cur); + + std::unique_ptr myBasisSet; + if (basisset_map_.find(basisset_name) == basisset_map_.end()) + this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); + else + myBasisSet.reset(basisset_map_[basisset_name]->makeClone()); + + std::unique_ptr> sposet; + if (doCuspCorrection) + { + sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet)); + } + else + { + auto lcos = std::make_unique>(spo_name, std::move(myBasisSet)); + loadMO(*lcos, cur); + sposet = std::move(lcos); + } + + return sposet; } /** Parse the xml file for information on the Dirac determinants. *@param cur the current xmlNode */ -template -bool -LCAOrbitalBuilderT::loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur) +template +bool LCAOrbitalBuilderT::loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur) { #undef FunctionName -#define FunctionName \ - printf("Calling FunctionName from %s\n", __FUNCTION__); \ - FunctionNameReal - // Check if HDF5 present - ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)"); - - // initialize the number of orbital by the basis set size - int norb = spo.getBasisSetSize(); - std::string debugc("no"); - double orbital_mix_magnitude = 0.0; - bool PBC = false; - OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "orbitals"); - aAttrib.add(norb, "size"); - aAttrib.add(debugc, "debug"); - aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude"); - aAttrib.put(cur); - xmlNodePtr occ_ptr = NULL; - xmlNodePtr coeff_ptr = NULL; - cur = cur->xmlChildrenNode; - while (cur != NULL) { - std::string cname((const char*)(cur->name)); - if (cname == "occupation") { - occ_ptr = cur; - } - else if (cname.find("coeff") < cname.size() || cname == "parameter" || - cname == "Var") { - coeff_ptr = cur; - } - cur = cur->next; - } - if (coeff_ptr == NULL) { - app_log() << " Using Identity for the LCOrbitalSet " << std::endl; - return true; +#define FunctionName \ + printf("Calling FunctionName from %s\n", __FUNCTION__); \ + FunctionNameReal + // Check if HDF5 present + ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)"); + + // initialize the number of orbital by the basis set size + int norb = spo.getBasisSetSize(); + std::string debugc("no"); + double orbital_mix_magnitude = 0.0; + bool PBC = false; + OhmmsAttributeSet aAttrib; + aAttrib.add(norb, "orbitals"); + aAttrib.add(norb, "size"); + aAttrib.add(debugc, "debug"); + aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude"); + aAttrib.put(cur); + xmlNodePtr occ_ptr = NULL; + xmlNodePtr coeff_ptr = NULL; + cur = cur->xmlChildrenNode; + while (cur != NULL) + { + std::string cname((const char*)(cur->name)); + if (cname == "occupation") + { + occ_ptr = cur; } - spo.setOrbitalSetSize(norb); - bool success = putOccupation(spo, occ_ptr); - if (h5_path == "") - success = putFromXML(spo, coeff_ptr); - else { - hdf_archive hin(this->myComm); - - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect " - "path to H5 file."); - - try { - hin.push("PBC", false); - PBC = true; - } - catch (const std::exception& e) { - app_debug() << e.what() << std::endl; - PBC = false; - } - - if (PBC) - hin.read(PBC, "PBC"); - - hin.close(); - } - this->myComm->bcast(PBC); - if (PBC) - success = putPBCFromH5(spo, coeff_ptr); - else - success = putFromH5(spo, coeff_ptr); + else if (cname.find("coeff") < cname.size() || cname == "parameter" || cname == "Var") + { + coeff_ptr = cur; } + cur = cur->next; + } + if (coeff_ptr == NULL) + { + app_log() << " Using Identity for the LCOrbitalSet " << std::endl; + return true; + } + spo.setOrbitalSetSize(norb); + bool success = putOccupation(spo, occ_ptr); + if (h5_path == "") + success = putFromXML(spo, coeff_ptr); + else + { + hdf_archive hin(this->myComm); - // Ye: used to construct cusp correction - // bool success2 = transformSPOSet(); - if (debugc == "yes") { - app_log() << " Single-particle orbital coefficients dims=" - << spo.C->rows() << " x " << spo.C->cols() << std::endl; - app_log() << *spo.C << std::endl; + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect " + "path to H5 file."); + + try + { + hin.push("PBC", false); + PBC = true; + } + catch (const std::exception& e) + { + app_debug() << e.what() << std::endl; + PBC = false; + } + + if (PBC) + hin.read(PBC, "PBC"); + + hin.close(); } - - return success; + this->myComm->bcast(PBC); + if (PBC) + success = putPBCFromH5(spo, coeff_ptr); + else + success = putFromH5(spo, coeff_ptr); + } + + // Ye: used to construct cusp correction + // bool success2 = transformSPOSet(); + if (debugc == "yes") + { + app_log() << " Single-particle orbital coefficients dims=" << spo.C->rows() << " x " << spo.C->cols() + << std::endl; + app_log() << *spo.C << std::endl; + } + + return success; } -template -bool -LCAOrbitalBuilderT::putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) +template +bool LCAOrbitalBuilderT::putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { - int norbs = 0; - OhmmsAttributeSet aAttrib; - aAttrib.add(norbs, "size"); - aAttrib.add(norbs, "orbitals"); - aAttrib.put(coeff_ptr); - if (norbs < spo.getOrbitalSetSize()) { - return false; - APP_ABORT("LCAOrbitalBuilder::putFromXML missing or incorrect size"); - } - if (norbs) { - std::vector Ctemp; - int BasisSetSize = spo.getBasisSetSize(); - Ctemp.resize(norbs * BasisSetSize); - putContent(Ctemp, coeff_ptr); - int n = 0, i = 0; - typename std::vector::iterator cit(Ctemp.begin()); - while (i < spo.getOrbitalSetSize()) { - if (Occ[n] > std::numeric_limits::epsilon()) { - std::copy(cit, cit + BasisSetSize, (*spo.C)[i]); - i++; - } - n++; - cit += BasisSetSize; - } + int norbs = 0; + OhmmsAttributeSet aAttrib; + aAttrib.add(norbs, "size"); + aAttrib.add(norbs, "orbitals"); + aAttrib.put(coeff_ptr); + if (norbs < spo.getOrbitalSetSize()) + { + return false; + APP_ABORT("LCAOrbitalBuilder::putFromXML missing or incorrect size"); + } + if (norbs) + { + std::vector Ctemp; + int BasisSetSize = spo.getBasisSetSize(); + Ctemp.resize(norbs * BasisSetSize); + putContent(Ctemp, coeff_ptr); + int n = 0, i = 0; + typename std::vector::iterator cit(Ctemp.begin()); + while (i < spo.getOrbitalSetSize()) + { + if (Occ[n] > std::numeric_limits::epsilon()) + { + std::copy(cit, cit + BasisSetSize, (*spo.C)[i]); + i++; + } + n++; + cit += BasisSetSize; } - return true; + } + return true; } /** read data from a hdf5 file * @param norb number of orbitals to be initialized * @param coeff_ptr xmlnode for coefficients */ -template -bool -LCAOrbitalBuilderT::putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) +template +bool LCAOrbitalBuilderT::putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { - int neigs = spo.getBasisSetSize(); - int setVal = -1; - OhmmsAttributeSet aAttrib; - aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); - aAttrib.put(coeff_ptr); - hdf_archive hin(this->myComm); - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " - "to H5 file."); - - Matrix Ctemp; - std::array name; - - // This is to make sure of Backward compatibility with previous tags. - int name_len = std::snprintf( - name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - std::string setname(name.data(), name_len); - if (!hin.readEntry(Ctemp, setname)) { - name_len = std::snprintf( - name.data(), name.size(), "%s%d", "/KPTS_0/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - setname = std::string(name.data(), name_len); - hin.read(Ctemp, setname); - } - hin.close(); - - if (Ctemp.cols() != spo.getBasisSetSize()) { - std::ostringstream err_msg; - err_msg << "Basis set size " << spo.getBasisSetSize() - << " mismatched the number of MO coefficients columns " - << Ctemp.cols() << " from h5." << std::endl; - this->myComm->barrier_and_abort(err_msg.str()); - } + int neigs = spo.getBasisSetSize(); + int setVal = -1; + OhmmsAttributeSet aAttrib; + aAttrib.add(setVal, "spindataset"); + aAttrib.add(neigs, "size"); + aAttrib.add(neigs, "orbitals"); + aAttrib.put(coeff_ptr); + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " + "to H5 file."); + + Matrix Ctemp; + std::array name; - int norbs = spo.getOrbitalSetSize(); - if (Ctemp.rows() < norbs) { - std::ostringstream err_msg; - err_msg << "Need " << norbs - << " orbitals. Insufficient rows of MO coefficients " - << Ctemp.rows() << " from h5." << std::endl; - this->myComm->barrier_and_abort(err_msg.str()); - } + // This is to make sure of Backward compatibility with previous tags. + int name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); + if (name_len < 0) + throw std::runtime_error("Error generating name"); + std::string setname(name.data(), name_len); + if (!hin.readEntry(Ctemp, setname)) + { + name_len = std::snprintf(name.data(), name.size(), "%s%d", "/KPTS_0/eigenset_", setVal); + if (name_len < 0) + throw std::runtime_error("Error generating name"); + setname = std::string(name.data(), name_len); + hin.read(Ctemp, setname); + } + hin.close(); - int n = 0, i = 0; - while (i < norbs) { - if (Occ[n] > 0.0) { - std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); - i++; - } - n++; - } + if (Ctemp.cols() != spo.getBasisSetSize()) + { + std::ostringstream err_msg; + err_msg << "Basis set size " << spo.getBasisSetSize() << " mismatched the number of MO coefficients columns " + << Ctemp.cols() << " from h5." << std::endl; + this->myComm->barrier_and_abort(err_msg.str()); } - this->myComm->bcast(spo.C->data(), spo.C->size()); - return true; + + int norbs = spo.getOrbitalSetSize(); + if (Ctemp.rows() < norbs) + { + std::ostringstream err_msg; + err_msg << "Need " << norbs << " orbitals. Insufficient rows of MO coefficients " << Ctemp.rows() << " from h5." + << std::endl; + this->myComm->barrier_and_abort(err_msg.str()); + } + + int n = 0, i = 0; + while (i < norbs) + { + if (Occ[n] > 0.0) + { + std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); + i++; + } + n++; + } + } + this->myComm->bcast(spo.C->data(), spo.C->size()); + return true; } /** read data from a hdf5 file * @param norb number of orbitals to be initialized * @param coeff_ptr xmlnode for coefficients */ -template -bool -LCAOrbitalBuilderT::putPBCFromH5( - LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) +template +bool LCAOrbitalBuilderT::putPBCFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { - ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5"); - int norbs = spo.getOrbitalSetSize(); - int neigs = spo.getBasisSetSize(); - int setVal = -1; - bool IsComplex = false; - bool MultiDet = false; - PosType SuperTwist(0.0); - PosType SuperTwistH5(0.0); - OhmmsAttributeSet aAttrib; - aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); - aAttrib.put(coeff_ptr); - hdf_archive hin(this->myComm); - - xmlNodePtr curtemp = coeff_ptr; - - std::string xmlTag("determinantset"); - std::string MSDTag("sposet"); - std::string SDTag("determinant"); - std::string EndTag("qmcsystem"); - std::string curname; - - do { - std::stringstream ss; - curtemp = curtemp->parent; - ss << curtemp->name; - ss >> curname; - if (curname == MSDTag) - MultiDet = true; /// Used to know if running an MSD calculation - - /// needed for order of Orbitals. - if (curname == SDTag) - MultiDet = false; - - } while ((xmlTag != curname) && (curname != EndTag)); - if (curname == EndTag) { - APP_ABORT("Could not find in wf file the \"sposet\" or \"determinant\" " - "tags. Please verify input or contact developers"); + ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5"); + int norbs = spo.getOrbitalSetSize(); + int neigs = spo.getBasisSetSize(); + int setVal = -1; + bool IsComplex = false; + bool MultiDet = false; + PosType SuperTwist(0.0); + PosType SuperTwistH5(0.0); + OhmmsAttributeSet aAttrib; + aAttrib.add(setVal, "spindataset"); + aAttrib.add(neigs, "size"); + aAttrib.add(neigs, "orbitals"); + aAttrib.put(coeff_ptr); + hdf_archive hin(this->myComm); + + xmlNodePtr curtemp = coeff_ptr; + + std::string xmlTag("determinantset"); + std::string MSDTag("sposet"); + std::string SDTag("determinant"); + std::string EndTag("qmcsystem"); + std::string curname; + + do + { + std::stringstream ss; + curtemp = curtemp->parent; + ss << curtemp->name; + ss >> curname; + if (curname == MSDTag) + MultiDet = true; /// Used to know if running an MSD calculation - + /// needed for order of Orbitals. + if (curname == SDTag) + MultiDet = false; + + } while ((xmlTag != curname) && (curname != EndTag)); + if (curname == EndTag) + { + APP_ABORT("Could not find in wf file the \"sposet\" or \"determinant\" " + "tags. Please verify input or contact developers"); + } + + aAttrib.add(SuperTwist, "twist"); + aAttrib.put(curtemp); + + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " + "to H5 file."); + hin.push("parameters"); + hin.read(IsComplex, "IsComplex"); + hin.pop(); + + std::string setname("/Super_Twist/Coord"); + hin.read(SuperTwistH5, setname); + if (std::abs(SuperTwistH5[0] - SuperTwist[0]) >= 1e-6 || std::abs(SuperTwistH5[1] - SuperTwist[1]) >= 1e-6 || + std::abs(SuperTwistH5[2] - SuperTwist[2]) >= 1e-6) + { + app_log() << "Super Twist in XML : " << SuperTwist[0] << " In H5:" << SuperTwistH5[0] << std::endl; + app_log() << " " << SuperTwist[1] << " " << SuperTwistH5[1] << std::endl; + app_log() << " " << SuperTwist[2] << " " << SuperTwistH5[2] << std::endl; + app_log() << "Diff in Coord x :" << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl; + app_log() << " y :" << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl; + app_log() << " z :" << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl; + APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do " + "not Match!!! Aborting."); } + // SuperTwist=SuperTwistH5; + Matrix Ctemp; + LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet); - aAttrib.add(SuperTwist, "twist"); - aAttrib.put(curtemp); - - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " - "to H5 file."); - hin.push("parameters"); - hin.read(IsComplex, "IsComplex"); - hin.pop(); - - std::string setname("/Super_Twist/Coord"); - hin.read(SuperTwistH5, setname); - if (std::abs(SuperTwistH5[0] - SuperTwist[0]) >= 1e-6 || - std::abs(SuperTwistH5[1] - SuperTwist[1]) >= 1e-6 || - std::abs(SuperTwistH5[2] - SuperTwist[2]) >= 1e-6) { - app_log() << "Super Twist in XML : " << SuperTwist[0] - << " In H5:" << SuperTwistH5[0] << std::endl; - app_log() << " " << SuperTwist[1] - << " " << SuperTwistH5[1] << std::endl; - app_log() << " " << SuperTwist[2] - << " " << SuperTwistH5[2] << std::endl; - app_log() << "Diff in Coord x :" - << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl; - app_log() << " y :" - << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl; - app_log() << " z :" - << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl; - APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do " - "not Match!!! Aborting."); - } - // SuperTwist=SuperTwistH5; - Matrix Ctemp; - LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet); - - int n = 0, i = 0; - while (i < norbs) { - if (Occ[n] > 0.0) { - std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); - i++; - } - n++; - } - - hin.close(); + int n = 0, i = 0; + while (i < norbs) + { + if (Occ[n] > 0.0) + { + std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); + i++; + } + n++; } + + hin.close(); + } #ifdef HAVE_MPI - this->myComm->comm.broadcast_n(spo.C->data(), spo.C->size()); + this->myComm->comm.broadcast_n(spo.C->data(), spo.C->size()); #endif - return true; + return true; } -template -bool -LCAOrbitalBuilderT::putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr) +template +bool LCAOrbitalBuilderT::putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr) { - // die?? - if (spo.getBasisSetSize() == 0) { - APP_ABORT( - "LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize"); - return false; - } - Occ.resize(std::max(spo.getBasisSetSize(), spo.getOrbitalSetSize())); - Occ = 0.0; - for (int i = 0; i < spo.getOrbitalSetSize(); i++) - Occ[i] = 1.0; - std::vector occ_in; - std::string occ_mode("table"); - if (occ_ptr == NULL) { - occ_mode = "ground"; - } - else { - const std::string o(getXMLAttributeValue(occ_ptr, "mode")); - if (!o.empty()) - occ_mode = o; - } - // Do nothing if mode == ground - if (occ_mode == "excited") { - putContent(occ_in, occ_ptr); - for (int k = 0; k < occ_in.size(); k++) { - if (occ_in[k] < 0) // remove this, -1 is to adjust the base - Occ[-occ_in[k] - 1] = 0.0; - else - Occ[occ_in[k] - 1] = 1.0; - } - } - else if (occ_mode == "table") { - putContent(Occ, occ_ptr); + // die?? + if (spo.getBasisSetSize() == 0) + { + APP_ABORT("LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize"); + return false; + } + Occ.resize(std::max(spo.getBasisSetSize(), spo.getOrbitalSetSize())); + Occ = 0.0; + for (int i = 0; i < spo.getOrbitalSetSize(); i++) + Occ[i] = 1.0; + std::vector occ_in; + std::string occ_mode("table"); + if (occ_ptr == NULL) + { + occ_mode = "ground"; + } + else + { + const std::string o(getXMLAttributeValue(occ_ptr, "mode")); + if (!o.empty()) + occ_mode = o; + } + // Do nothing if mode == ground + if (occ_mode == "excited") + { + putContent(occ_in, occ_ptr); + for (int k = 0; k < occ_in.size(); k++) + { + if (occ_in[k] < 0) // remove this, -1 is to adjust the base + Occ[-occ_in[k] - 1] = 0.0; + else + Occ[occ_in[k] - 1] = 1.0; } - return true; + } + else if (occ_mode == "table") + { + putContent(Occ, occ_ptr); + } + return true; } -template -void -LCAOrbitalBuilderT::readRealMatrixFromH5( - hdf_archive& hin, const std::string& setname, Matrix& Creal) const +template +void LCAOrbitalBuilderT::readRealMatrixFromH5(hdf_archive& hin, + const std::string& setname, + Matrix& Creal) const { - hin.read(Creal, setname); + hin.read(Creal, setname); } -template -void -LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, int setVal, - PosType& SuperTwist, Matrix>& Ctemp, bool MultiDet) +template +void LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix>& Ctemp, + bool MultiDet) { - Matrix Creal; - Matrix Ccmplx; - - std::array name; - int name_len{0}; - /// When running Single Determinant calculations, MO coeff loaded based on - /// occupation and lowest eingenvalue. However, for solids with - /// multideterminants, orbitals are order by kpoints; first all MOs for - /// kpoint 1, then 2 etc - /// The multideterminants occupation is specified in the input/HDF5 and - /// theefore as long as there is consistency between the order in which we - /// read the orbitals and the occupation, we are safe. In the case of - /// Multideterminants generated by pyscf and Quantum Package, They are - /// stored in the same order as generated for quantum package and one - /// should use the orbitals labelled eigenset_unsorted. - - if (MultiDet == false) - name_len = std::snprintf( - name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - else - name_len = std::snprintf(name.data(), name.size(), "%s%d", - "/Super_Twist/eigenset_unsorted_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - - std::string setname(name.data(), name_len); - readRealMatrixFromH5(hin, setname, Creal); - - bool IsComplex = true; - hin.read(IsComplex, "/parameters/IsComplex"); - if (IsComplex == false) { - Ccmplx.resize(Creal.rows(), Creal.cols()); - Ccmplx = 0.0; - } - else { - setname += "_imag"; - readRealMatrixFromH5(hin, setname, Ccmplx); - } - - Ctemp.resize(Creal.rows(), Creal.cols()); - for (int i = 0; i < Ctemp.rows(); i++) - for (int j = 0; j < Ctemp.cols(); j++) - Ctemp[i][j] = std::complex(Creal[i][j], Ccmplx[i][j]); + Matrix Creal; + Matrix Ccmplx; + + std::array name; + int name_len{0}; + /// When running Single Determinant calculations, MO coeff loaded based on + /// occupation and lowest eingenvalue. However, for solids with + /// multideterminants, orbitals are order by kpoints; first all MOs for + /// kpoint 1, then 2 etc + /// The multideterminants occupation is specified in the input/HDF5 and + /// theefore as long as there is consistency between the order in which we + /// read the orbitals and the occupation, we are safe. In the case of + /// Multideterminants generated by pyscf and Quantum Package, They are + /// stored in the same order as generated for quantum package and one + /// should use the orbitals labelled eigenset_unsorted. + + if (MultiDet == false) + name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); + else + name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal); + if (name_len < 0) + throw std::runtime_error("Error generating name"); + + std::string setname(name.data(), name_len); + readRealMatrixFromH5(hin, setname, Creal); + + bool IsComplex = true; + hin.read(IsComplex, "/parameters/IsComplex"); + if (IsComplex == false) + { + Ccmplx.resize(Creal.rows(), Creal.cols()); + Ccmplx = 0.0; + } + else + { + setname += "_imag"; + readRealMatrixFromH5(hin, setname, Ccmplx); + } + + Ctemp.resize(Creal.rows(), Creal.cols()); + for (int i = 0; i < Ctemp.rows(); i++) + for (int j = 0; j < Ctemp.cols(); j++) + Ctemp[i][j] = std::complex(Creal[i][j], Ccmplx[i][j]); } -template -void -LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, int setVal, - PosType& SuperTwist, Matrix& Creal, bool MultiDet) +template +void LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix& Creal, + bool MultiDet) { - bool IsComplex = false; - hin.read(IsComplex, "/parameters/IsComplex"); - if (IsComplex && - (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 || - std::abs(SuperTwist[2]) >= 1e-6)) { - std::string setname( - "This Wavefunction is Complex and you are using the real version " - "of QMCPACK. " - "Please re-run this job with the Complex build of QMCPACK."); - APP_ABORT(setname.c_str()); - } - - std::array name; - int name_len{0}; - bool PBC = false; - hin.read(PBC, "/PBC/PBC"); - if (MultiDet && PBC) - name_len = std::snprintf(name.data(), name.size(), "%s%d", - "/Super_Twist/eigenset_unsorted_", setVal); - else - name_len = std::snprintf( - name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - - readRealMatrixFromH5(hin, std::string(name.data(), name_len), Creal); + bool IsComplex = false; + hin.read(IsComplex, "/parameters/IsComplex"); + if (IsComplex && + (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 || std::abs(SuperTwist[2]) >= 1e-6)) + { + std::string setname("This Wavefunction is Complex and you are using the real version " + "of QMCPACK. " + "Please re-run this job with the Complex build of QMCPACK."); + APP_ABORT(setname.c_str()); + } + + std::array name; + int name_len{0}; + bool PBC = false; + hin.read(PBC, "/PBC/PBC"); + if (MultiDet && PBC) + name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal); + else + name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); + if (name_len < 0) + throw std::runtime_error("Error generating name"); + + readRealMatrixFromH5(hin, std::string(name.data(), name_len), Creal); } /// Periodic Image Phase Factors computation to be determined -template -void -LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors( - PosType SuperTwist, std::vector& LocPeriodicImagePhaseFactors) +template +void LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors(PosType SuperTwist, + std::vector& LocPeriodicImagePhaseFactors) { - const int NbImages = - (PBCImages[0] + 1) * (PBCImages[1] + 1) * (PBCImages[2] + 1); - LocPeriodicImagePhaseFactors.resize(NbImages); - for (size_t i = 0; i < NbImages; i++) - LocPeriodicImagePhaseFactors[i] = 1.0; + const int NbImages = (PBCImages[0] + 1) * (PBCImages[1] + 1) * (PBCImages[2] + 1); + LocPeriodicImagePhaseFactors.resize(NbImages); + for (size_t i = 0; i < NbImages; i++) + LocPeriodicImagePhaseFactors[i] = 1.0; } -template -void -LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors(PosType SuperTwist, +template +void LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors( + PosType SuperTwist, std::vector>& LocPeriodicImagePhaseFactors) { - // Allow computation to continue with no HDF file if the system has open - // boundary conditions. The complex build is usually only used with open BC - // for testing. - bool usesOpenBC = - PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0; - - /// Exp(ik.g) where i is imaginary, k is the supertwist and g is the - /// translation vector PBCImage. - if (h5_path != "" && !usesOpenBC) { - hdf_archive hin(this->myComm); - if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("Could not open H5 file"); - - hin.push("Cell", false); - - hin.read(Lattice, "LatticeVectors"); - hin.close(); - } - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - this->myComm->bcast(Lattice(i, j)); - } - else if (!usesOpenBC) { - APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour " - "is unknown. Safer to exit"); - } + // Allow computation to continue with no HDF file if the system has open + // boundary conditions. The complex build is usually only used with open BC + // for testing. + bool usesOpenBC = PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0; + + /// Exp(ik.g) where i is imaginary, k is the supertwist and g is the + /// translation vector PBCImage. + if (h5_path != "" && !usesOpenBC) + { + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) + { + if (!hin.open(h5_path, H5F_ACC_RDONLY)) + APP_ABORT("Could not open H5 file"); - int phase_idx = 0; - int TransX, TransY, TransZ; - RealType phase; + hin.push("Cell", false); - for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + hin.read(Lattice, "LatticeVectors"); + hin.close(); + } + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + this->myComm->bcast(Lattice(i, j)); + } + else if (!usesOpenBC) + { + APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour " + "is unknown. Safer to exit"); + } + + int phase_idx = 0; + int TransX, TransY, TransZ; + RealType phase; + + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y { - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y - { - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z - { - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - RealType s, c; - PosType Val; - Val[0] = TransX * Lattice(0, 0) + TransY * Lattice(1, 0) + - TransZ * Lattice(2, 0); - Val[1] = TransX * Lattice(0, 1) + TransY * Lattice(1, 1) + - TransZ * Lattice(2, 1); - Val[2] = TransX * Lattice(0, 2) + TransY * Lattice(1, 2) + - TransZ * Lattice(2, 2); - - phase = dot(SuperTwist, Val); - qmcplusplus::sincos(phase, &s, &c); - - LocPeriodicImagePhaseFactors.emplace_back(c, s); - } - } + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z + { + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + RealType s, c; + PosType Val; + Val[0] = TransX * Lattice(0, 0) + TransY * Lattice(1, 0) + TransZ * Lattice(2, 0); + Val[1] = TransX * Lattice(0, 1) + TransY * Lattice(1, 1) + TransZ * Lattice(2, 1); + Val[2] = TransX * Lattice(0, 2) + TransY * Lattice(1, 2) + TransZ * Lattice(2, 2); + + phase = dot(SuperTwist, Val); + qmcplusplus::sincos(phase, &s, &c); + + LocPeriodicImagePhaseFactors.emplace_back(c, s); + } } + } } #ifndef QMC_COMPLEX diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h index 5cff3a5612a..a4b309ec395 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h @@ -32,113 +32,97 @@ namespace qmcplusplus * Reimplement MolecularSPOSetBuilder * - support both CartesianTensor and SphericalTensor */ -template +template class LCAOrbitalBuilderT : public SPOSetBuilderT { public: - using BasisSet_t = typename LCAOrbitalSetT::basis_type; - using RealType = typename LCAOrbitalSetT::RealType; - using ValueType = typename LCAOrbitalSetT::ValueType; - using PosType = typename LCAOrbitalSetT::PosType; + using BasisSet_t = typename LCAOrbitalSetT::basis_type; + using RealType = typename LCAOrbitalSetT::RealType; + using ValueType = typename LCAOrbitalSetT::ValueType; + using PosType = typename LCAOrbitalSetT::PosType; - /** constructor + /** constructor * \param els reference to the electrons * \param ions reference to the ions */ - LCAOrbitalBuilderT(ParticleSetT& els, ParticleSetT& ions, - Communicate* comm, xmlNodePtr cur); - ~LCAOrbitalBuilderT() override; - std::unique_ptr> - createSPOSetFromXML(xmlNodePtr cur) override; + LCAOrbitalBuilderT(ParticleSetT& els, ParticleSetT& ions, Communicate* comm, xmlNodePtr cur); + ~LCAOrbitalBuilderT() override; + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; protected: - /// target ParticleSet - ParticleSetT& targetPtcl; - /// source ParticleSet - ParticleSetT& sourcePtcl; - /// localized basis set map - std::map> basisset_map_; - /// if true, add cusp correction to orbitals - bool cuspCorr; - /// Path to HDF5 Wavefunction - std::string h5_path; - /// Number of periodic Images for Orbital evaluation - TinyVector PBCImages; - /// Coordinates Super Twist - PosType SuperTwist; - /// Periodic Image Phase Factors. Correspond to the phase from the - /// PBCImages. Computed only once. - std::vector PeriodicImagePhaseFactors; - /// Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors - Tensor Lattice; + /// target ParticleSet + ParticleSetT& targetPtcl; + /// source ParticleSet + ParticleSetT& sourcePtcl; + /// localized basis set map + std::map> basisset_map_; + /// if true, add cusp correction to orbitals + bool cuspCorr; + /// Path to HDF5 Wavefunction + std::string h5_path; + /// Number of periodic Images for Orbital evaluation + TinyVector PBCImages; + /// Coordinates Super Twist + PosType SuperTwist; + /// Periodic Image Phase Factors. Correspond to the phase from the + /// PBCImages. Computed only once. + std::vector PeriodicImagePhaseFactors; + /// Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors + Tensor Lattice; - /// Enable cusp correction - bool doCuspCorrection; + /// Enable cusp correction + bool doCuspCorrection; - /** create basis set + /** create basis set * * Use ao_traits to match (ROT)x(SH) combo */ - template - BasisSet_t* - createBasisSet(xmlNodePtr cur); - template - BasisSet_t* - createBasisSetH5(); + template + BasisSet_t* createBasisSet(xmlNodePtr cur); + template + BasisSet_t* createBasisSetH5(); - // The following items were previously in SPOSet - /// occupation number - Vector Occ; - bool - loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur); - bool - putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr); - bool - putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); - bool - putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); - bool - putPBCFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); - // the dimensions of Ctemp are determined by the dataset on file - void - LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, - Matrix>& Ctemp, bool MultiDet); - // the dimensions of Creal are determined by the dataset on file - void - LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, - Matrix& Creal, bool Multidet); - void - EvalPeriodicImagePhaseFactors(PosType SuperTwist, - std::vector& LocPeriodicImagePhaseFactors); - void - EvalPeriodicImagePhaseFactors(PosType SuperTwist, - std::vector>& LocPeriodicImagePhaseFactors); - /** read matrix from h5 file + // The following items were previously in SPOSet + /// occupation number + Vector Occ; + bool loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur); + bool putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr); + bool putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + bool putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + bool putPBCFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + // the dimensions of Ctemp are determined by the dataset on file + void LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix>& Ctemp, + bool MultiDet); + // the dimensions of Creal are determined by the dataset on file + void LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, Matrix& Creal, bool Multidet); + void EvalPeriodicImagePhaseFactors(PosType SuperTwist, std::vector& LocPeriodicImagePhaseFactors); + void EvalPeriodicImagePhaseFactors(PosType SuperTwist, + std::vector>& LocPeriodicImagePhaseFactors); + /** read matrix from h5 file * \param[in] hin: hdf5 arhive to be read from * \param setname: where to read from in hdf5 archive * \param[out] Creal: matrix read from h5 * * added in header to allow use from derived class LCAOSpinorBuilder as well */ - void - readRealMatrixFromH5(hdf_archive& hin, const std::string& setname, - Matrix& Creal) const; + void readRealMatrixFromH5(hdf_archive& hin, const std::string& setname, Matrix& Creal) const; private: - /// enable cusp correction - std::unique_ptr> - createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name, - std::string cusp_file, std::unique_ptr&& myBasisSet); - /// load a basis set from XML input - std::unique_ptr - loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); - /// load a basis set from h5 file - std::unique_ptr - loadBasisSetFromH5(xmlNodePtr parent); - /// determine radial orbital type based on "keyword" and "transform" - /// attributes - int - determineRadialOrbType(xmlNodePtr cur) const; + /// enable cusp correction + std::unique_ptr> createWithCuspCorrection(xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, + std::unique_ptr&& myBasisSet); + /// load a basis set from XML input + std::unique_ptr loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); + /// load a basis set from h5 file + std::unique_ptr loadBasisSetFromH5(xmlNodePtr parent); + /// determine radial orbital type based on "keyword" and "transform" + /// attributes + int determineRadialOrbType(xmlNodePtr cur) const; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp index 81f6b64da41..4da67b60332 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -18,941 +18,972 @@ namespace qmcplusplus { -template +template struct LCAOrbitalSetT::LCAOMultiWalkerMem : public Resource { - LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") - { - } - LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() - { - } + LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {} + LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} - std::unique_ptr - makeClone() const override - { - return std::make_unique(*this); - } + std::unique_ptr makeClone() const override { return std::make_unique(*this); } - OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] - OffloadMWVGLArray basis_mw; // [5][NW][NumAO] - OffloadMWVArray phi_v; // [NW][NumMO] - OffloadMWVArray basis_v_mw; // [NW][NumMO] + OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] + OffloadMWVGLArray basis_mw; // [5][NW][NumAO] + OffloadMWVArray phi_v; // [NW][NumMO] + OffloadMWVArray basis_v_mw; // [NW][NumMO] }; -template -LCAOrbitalSetT::LCAOrbitalSetT( - const std::string& my_name, std::unique_ptr&& bs) : - SPOSetT(my_name), - BasisSetSize(bs ? bs->getBasisSetSize() : 0), - Identity(true), - basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), - mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) -{ - if (!bs) - throw std::runtime_error( - "LCAOrbitalSetT cannot take nullptr as its basis set!"); - myBasisSet = std::move(bs); - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - this->OrbitalSetSize = BasisSetSize; - LCAOrbitalSetT::checkObject(); -} - -template -LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) : - SPOSetT(in), - myBasisSet(in.myBasisSet->makeClone()), - C(in.C), - BasisSetSize(in.BasisSetSize), - C_copy(in.C_copy), - Identity(in.Identity), - basis_timer_(in.basis_timer_), - mo_timer_(in.mo_timer_) -{ - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - if (!in.Identity) { - Tempv.resize(this->OrbitalSetSize); - Temphv.resize(this->OrbitalSetSize); - Tempghv.resize(this->OrbitalSetSize); - } - LCAOrbitalSetT::checkObject(); +template +LCAOrbitalSetT::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs) + : SPOSetT(my_name), + BasisSetSize(bs ? bs->getBasisSetSize() : 0), + Identity(true), + basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), + mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) +{ + if (!bs) + throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its basis set!"); + myBasisSet = std::move(bs); + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + this->OrbitalSetSize = BasisSetSize; + LCAOrbitalSetT::checkObject(); +} + +template +LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) + : SPOSetT(in), + myBasisSet(in.myBasisSet->makeClone()), + C(in.C), + BasisSetSize(in.BasisSetSize), + C_copy(in.C_copy), + Identity(in.Identity), + basis_timer_(in.basis_timer_), + mo_timer_(in.mo_timer_) +{ + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + if (!in.Identity) + { + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + } + LCAOrbitalSetT::checkObject(); } -template -void -LCAOrbitalSetT::setOrbitalSetSize(int norbs) +template +void LCAOrbitalSetT::setOrbitalSetSize(int norbs) { - if (C) - throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot " - "reset existing MO coefficients"); + if (C) + throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot " + "reset existing MO coefficients"); - Identity = false; - this->OrbitalSetSize = norbs; - C = std::make_shared(this->OrbitalSetSize, BasisSetSize); - Tempv.resize(this->OrbitalSetSize); - Temphv.resize(this->OrbitalSetSize); - Tempghv.resize(this->OrbitalSetSize); - LCAOrbitalSetT::checkObject(); + Identity = false; + this->OrbitalSetSize = norbs; + C = std::make_shared(this->OrbitalSetSize, BasisSetSize); + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + LCAOrbitalSetT::checkObject(); } -template -void -LCAOrbitalSetT::checkObject() const +template +void LCAOrbitalSetT::checkObject() const { - if (Identity) { - if (this->OrbitalSetSize != BasisSetSize) - throw std::runtime_error( - "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize " - "must be equal if Identity = true!"); - if (C) - throw std::runtime_error("LCAOrbitalSetT::checkObject C should be " - "nullptr if Identity = true!"); - } - else { - if (!C) - throw std::runtime_error("LCAOrbitalSetT::checkObject C should not " - "be nullptr if Identity = false!"); - if (this->OrbitalSetSize != C->rows()) - throw std::runtime_error("LCAOrbitalSetT::checkObject C rows " - "doesn't match OrbitalSetSize."); - if (BasisSetSize != C->cols()) - throw std::runtime_error("LCAOrbitalSetT::checkObject C columns " - "doesn't match BasisSetSize."); - } + if (Identity) + { + if (this->OrbitalSetSize != BasisSetSize) + throw std::runtime_error("LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize " + "must be equal if Identity = true!"); + if (C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should be " + "nullptr if Identity = true!"); + } + else + { + if (!C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should not " + "be nullptr if Identity = false!"); + if (this->OrbitalSetSize != C->rows()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C rows " + "doesn't match OrbitalSetSize."); + if (BasisSetSize != C->cols()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C columns " + "doesn't match BasisSetSize."); + } } -template -void -LCAOrbitalSetT::createResource(ResourceCollection& collection) const +template +void LCAOrbitalSetT::createResource(ResourceCollection& collection) const { - myBasisSet->createResource(collection); - auto resource_index = - collection.addResource(std::make_unique()); + myBasisSet->createResource(collection); + auto resource_index = collection.addResource(std::make_unique()); } -template -void -LCAOrbitalSetT::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void LCAOrbitalSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); - spo_leader.mw_mem_handle_ = collection.lendResource(); + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); + spo_leader.mw_mem_handle_ = collection.lendResource(); } -template -void -LCAOrbitalSetT::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void LCAOrbitalSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); - collection.takebackResource(spo_leader.mw_mem_handle_); + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); + collection.takebackResource(spo_leader.mw_mem_handle_); } -template +template RefVectorWithLeader::basis_type> LCAOrbitalSetT::extractBasisRefList( const RefVectorWithLeader>& spo_list) const { - RefVectorWithLeader basis_list(*spo_list.template getCastedLeader>().myBasisSet); - basis_list.reserve(spo_list.size()); - for (size_t iw = 0; iw < spo_list.size(); iw++) - basis_list.push_back(*spo_list.template getCastedElement>(iw).myBasisSet); - return basis_list; + RefVectorWithLeader basis_list(*spo_list.template getCastedLeader>().myBasisSet); + basis_list.reserve(spo_list.size()); + for (size_t iw = 0; iw < spo_list.size(); iw++) + basis_list.push_back(*spo_list.template getCastedElement>(iw).myBasisSet); + return basis_list; } -template -std::unique_ptr> -LCAOrbitalSetT::makeClone() const +template +std::unique_ptr> LCAOrbitalSetT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void -LCAOrbitalSetT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void LCAOrbitalSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - if (Identity) { // PAY ATTENTION TO COMPLEX - myBasisSet->evaluateV(P, iat, psi.data()); - } - else { - Vector vTemp(Temp.data(0), BasisSetSize); - this->myBasisSet->evaluateV(P, iat, vTemp.data()); - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - MatrixOperators::product(C_partial_view, vTemp, psi); - } + if (Identity) + { // PAY ATTENTION TO COMPLEX + myBasisSet->evaluateV(P, iat, psi.data()); + } + else + { + Vector vTemp(Temp.data(0), BasisSetSize); + this->myBasisSet->evaluateV(P, iat, vTemp.data()); + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + MatrixOperators::product(C_partial_view, vTemp, psi); + } } /** Find a better place for other user classes, Matrix should be padded as well */ -template -static void -Product_ABt(const VectorSoaContainer& A, const Matrix& B, - VectorSoaContainer& C) -{ - constexpr char transa = 't'; - constexpr char transb = 'n'; - constexpr T zone(1); - constexpr T zero(0); - BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), - A.data(), A.capacity(), zero, C.data(), C.capacity()); -} - -template -void -LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2psi.data()); -} - -template -void -LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, - GradVector& dpsi, HessVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - } -} - -template -void -LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, int i, - ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi, - GGGMatrix& dghpsi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xyz = temp.data(14); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - - dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx - dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy - dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz - dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy - dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy - dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz - dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz - dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz - dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz - - dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy - dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy - dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz - dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy - dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy - dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz - dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz - dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz - dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz - - dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz - dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz - dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz - dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz - dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz - dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz - dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz - dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz - dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz - } -} - -template -void -LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi, - GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xyz = temp.data(14); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - - dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx - dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy - dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz - dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy - dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy - dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz - dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz - dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz - dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz - - dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy - dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy - dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz - dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy - dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy - dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz - dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz - dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy - dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz - - dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz - dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz - dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz - dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz - dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz - dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz - dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz - dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz - dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz - } -} - -template -void -LCAOrbitalSetT::evaluate_ionderiv_v_row_impl( - const vgl_type& temp, GradVector& dpsi) const -{ - const size_t output_size = dpsi.size(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) { - // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property - // that - // for an atomic center, the ion gradient is the negative of the - // elecron gradient. Hence minus signs for each of these. - dpsi[j][0] = -gx[j]; - dpsi[j][1] = -gy[j]; - dpsi[j][2] = -gz[j]; - } -} - -template -void -LCAOrbitalSetT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - // TAKE CARE OF IDENTITY +template +static void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) +{ + constexpr char transa = 't'; + constexpr char transb = 'n'; + constexpr T zone(1); + constexpr T zero(0); + BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), + C.capacity()); +} + +template +void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2psi.data()); +} + +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi, + GGGMatrix& dghpsi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + + dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz + dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz + dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + + dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz + dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const +{ + const size_t output_size = dpsi.size(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[j][0] = -gx[j]; + dpsi[j][1] = -gy[j]; + dpsi[j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + // TAKE CARE OF IDENTITY + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateVGL(P, iat, Temp); + } + + if (Identity) + evaluate_vgl_impl(Temp, psi, dpsi, d2psi); + else + { + assert(psi.size() <= this->OrbitalSetSize); { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateVGL(P, iat, Temp); - } - - if (Identity) - evaluate_vgl_impl(Temp, psi, dpsi, d2psi); - else { - assert(psi.size() <= this->OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temp, C_partial_view, Tempv); - } - evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); - } -} - -template -void -LCAOrbitalSetT::mw_evaluateVGL( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; - - phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - - const size_t nw = phi_vgl_v.size(1); - - // TODO: make this cleaner? - for (int iw = 0; iw < nw; iw++) { - const size_t output_size = psi_v_list[iw].get().size(); - std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, - psi_v_list[iw].get().data()); - std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, - d2psi_v_list[iw].get().data()); - // grads are [dim, walker, orb] in phi_vgl_v - // [walker][orb, dim] in dpsi_v_list - for (size_t idim = 0; idim < QMCTraits::DIM; idim++) - BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, - &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM); - } -} - -template -void -LCAOrbitalSetT::mw_evaluateVGLImplGEMM( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVGLArray& phi_vgl_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw; - basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); - + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temp, C_partial_view, Tempv); + } + evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; + + phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + + const size_t nw = phi_vgl_v.size(1); + + // TODO: make this cleaner? + for (int iw = 0; iw < nw; iw++) + { + const size_t output_size = psi_v_list[iw].get().size(); + std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); + std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); + // grads are [dim, walker, orb] in phi_vgl_v + // [walker][orb, dim] in dpsi_v_list + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], + QMCTraits::DIM); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw; + basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); + + { + ScopedTimer local(basis_timer_); + myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw); + } + + if (Identity) + { + // output_size can be smaller than BasisSetSize + const size_t output_size = phi_vgl_v.size(2); + const size_t nw = phi_vgl_v.size(1); + + for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iw = 0; iw < nw; iw++) + std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); + } + else + { + const size_t requested_orb_size = phi_vgl_v.size(2); + assert(requested_orb_size <= this->OrbitalSetSize); { - ScopedTimer local(basis_timer_); - myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw); - } - - if (Identity) { - // output_size can be smaller than BasisSetSize - const size_t output_size = phi_vgl_v.size(2); - const size_t nw = phi_vgl_v.size(1); - - for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) - for (int iw = 0; iw < nw; iw++) - std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, - phi_vgl_v.data_at(idim, iw, 0)); - } - else { - const size_t requested_orb_size = phi_vgl_v.size(2); - assert(requested_orb_size <= this->OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view( - C->data(), requested_orb_size, BasisSetSize); - // TODO: make class for general blas interface in Platforms - // have instance of that class as member of LCAOrbitalSetT, call - // gemm through that - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), - BasisSetSize, 0, phi_vgl_v.data(), requested_orb_size); - } + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + // TODO: make class for general blas interface in Platforms + // have instance of that class as member of LCAOrbitalSetT, call + // gemm through that + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), + requested_orb_size); + } + } +} + +template +void LCAOrbitalSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; + phi_v.resize(spo_list.size(), this->OrbitalSetSize); + mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); + + const size_t output_size = phi_v.size(1); + const size_t nw = phi_v.size(0); + + for (int iw = 0; iw < nw; iw++) + std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); +} + +template +void LCAOrbitalSetT::mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVArray& phi_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + const size_t nw = spo_list.size(); + auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; + basis_v_mw.resize(nw, BasisSetSize); + + myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); + + if (Identity) + { + std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0)); + } + else + { + const size_t requested_orb_size = phi_v.size(1); + assert(requested_orb_size <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size(), // walkers + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), + requested_orb_size); + } +} + +template +void LCAOrbitalSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + const size_t nw = spo_list.size(); + for (size_t iw = 0; iw < nw; iw++) + { + for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) + { + spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); + ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size()); } + } } -template -void -LCAOrbitalSetT::mw_evaluateValue( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; - phi_v.resize(spo_list.size(), this->OrbitalSetSize); - mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); - - const size_t output_size = phi_v.size(1); - const size_t nw = phi_v.size(0); - - for (int iw = 0; iw < nw; iw++) - std::copy_n( - phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); -} - -template -void -LCAOrbitalSetT::mw_evaluateValueImplGEMM( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVArray& phi_v) const +template +void LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.template getCastedLeader>(); - const size_t nw = spo_list.size(); - auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; - basis_v_mw.resize(nw, BasisSetSize); - - myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); - - if (Identity) { - std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, - phi_v.data_at(0, 0)); - } - else { - const size_t requested_orb_size = phi_v.size(1); - assert(requested_orb_size <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size(), // walkers - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), - BasisSetSize, 0, phi_v.data(), requested_orb_size); - } -} - -template -void -LCAOrbitalSetT::mw_evaluateDetRatios( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - const size_t nw = spo_list.size(); - for (size_t iw = 0; iw < nw; iw++) { - for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) { - spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); - ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), - invRow_ptr_list[iw], psi_list[iw].get().size()); - } - } -} + Vector vTemp(Temp.data(0), BasisSetSize); + Vector invTemp(Temp.data(1), BasisSetSize); -template -void -LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSetT& VP, - ValueVector& psi, const ValueVector& psiinv, std::vector& ratios) -{ - Vector vTemp(Temp.data(0), BasisSetSize); - Vector invTemp(Temp.data(1), BasisSetSize); + { + ScopedTimer local(mo_timer_); + // when only a subset of orbitals is used, extract limited rows of C. + Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); + MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); + } + for (size_t j = 0; j < VP.getTotalNum(); j++) + { { - ScopedTimer local(mo_timer_); - // when only a subset of orbitals is used, extract limited rows of C. - Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); - MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); - } - - for (size_t j = 0; j < VP.getTotalNum(); j++) { - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateV(VP, j, vTemp.data()); - } - ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); - } -} - -template -void -LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - // Device data of phi_vgl_v must be up-to-date upon return - phi_vgl_v.updateTo(); - - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - for (int iw = 0; iw < nw; iw++) { - ratios[iw] = simd::dot( - invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); - GradType dphi; - for (size_t idim = 0; idim < QMCTraits::DIM; idim++) - dphi[idim] = - simd::dot(invRow_ptr_list[iw], - phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / - ratios[iw]; - grads[iw] = dphi; - } -} - -template -void -LCAOrbitalSetT::evaluateVGH(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) -{ - // TAKE CARE OF IDENTITY - myBasisSet->evaluateVGH(P, iat, Temph); - if (Identity) - evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); - else { - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); - } -} - -template -void -LCAOrbitalSetT::evaluateVGHGH(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, HessVector& dhpsi, GGGVector& dghpsi) -{ - // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not - // implemented\n"); - - // TAKE CARE OF IDENTITY - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - if (Identity) - evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); - else { - assert(psi.size() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); - } + ScopedTimer local(basis_timer_); + myBasisSet->evaluateV(VP, j, vTemp.data()); + } + ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const +{ + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + // Device data of phi_vgl_v must be up-to-date upon return + phi_vgl_v.updateTo(); + + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + for (int iw = 0; iw < nw; iw++) + { + ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); + GradType dphi; + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; + grads[iw] = dphi; + } +} + +template +void LCAOrbitalSetT::evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& dhpsi) +{ + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGH(P, iat, Temph); + if (Identity) + evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); + } +} + +template +void LCAOrbitalSetT::evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& dhpsi, + GGGVector& dghpsi) +{ + // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not + // implemented\n"); + + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + if (Identity) + evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Tempgh, C_partial_view, Tempghv); + evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); + } } /* implement using gemm algorithm */ -template -inline void -LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, int i, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) const -{ - const size_t output_size = logdet.cols(); - std::copy_n(temp.data(0), output_size, logdet[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) { - dlogdet[i][j][0] = gx[j]; - dlogdet[i][j][1] = gy[j]; - dlogdet[i][j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2logdet[i]); -} -template -void -LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, int i, - ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - } -} - -template -void -LCAOrbitalSetT::evaluate_ionderiv_v_impl( - const vgl_type& temp, int i, GradMatrix& dpsi) const -{ - const size_t output_size = dpsi.cols(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) { - // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property - // that - // for an atomic center, the ion gradient is the negative of the - // elecron gradient. Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - } -} - -template -void -LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i, - GradMatrix& dpsi, HessMatrix& dgpsi, GradMatrix& dlpsi) const -{ - const size_t output_size = dpsi.cols(); - const T* restrict gx = temp.data(1); - const T* restrict gy = temp.data(2); - const T* restrict gz = temp.data(3); - const T* restrict hxx = temp.data(4); - const T* restrict hxy = temp.data(5); - const T* restrict hxz = temp.data(6); - const T* restrict hyy = temp.data(7); - const T* restrict hyz = temp.data(8); - const T* restrict hzz = temp.data(9); - const T* restrict gh_xxx = temp.data(10); - const T* restrict gh_xxy = temp.data(11); - const T* restrict gh_xxz = temp.data(12); - const T* restrict gh_xyy = temp.data(13); - const T* restrict gh_xzz = temp.data(15); - const T* restrict gh_yyy = temp.data(16); - const T* restrict gh_yyz = temp.data(17); - const T* restrict gh_yzz = temp.data(18); - const T* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) { - // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property - // that - // for an atomic center, the ion gradient is the negative of the - // elecron gradient. Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - - dgpsi[i][j](0, 0) = -hxx[j]; - dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; - dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; - dgpsi[i][j](1, 1) = -hyy[j]; - dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; - dgpsi[i][j](2, 2) = -hzz[j]; - - // Since this returns the ion gradient of the laplacian, we have to - // trace the grad hessian vector. - dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); - dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); - dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); - } -} - -template -void -LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) -{ - if (Identity) { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGL(P, iat, Temp); - evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); - } - } - else { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGL(P, iat, Temp); - Product_ABt(Temp, C_partial_view, Tempv); - evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); - } - } -} - -template -void -LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - if (Identity) { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGH(P, iat, Temph); - evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); - } - } - else { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGH(P, iat, Temph); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); - } - } -} - -template -void -LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet) -{ - if (Identity) { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, - grad_grad_grad_logdet); - } - } - else { - assert(logdet.cols() <= this->OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); - Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); - evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, - grad_grad_logdet, grad_grad_grad_logdet); - } +template +inline void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const +{ + const size_t output_size = logdet.cols(); + std::copy_n(temp.data(0), output_size, logdet[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dlogdet[i][j][0] = gx[j]; + dlogdet[i][j][1] = gy[j]; + dlogdet[i][j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2logdet[i]); +} +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dpsi, + HessMatrix& dgpsi, + GradMatrix& dlpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + + dgpsi[i][j](0, 0) = -hxx[j]; + dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; + dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; + dgpsi[i][j](1, 1) = -hyy[j]; + dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; + dgpsi[i][j](2, 2) = -hzz[j]; + + // Since this returns the ion gradient of the laplacian, we have to + // trace the grad hessian vector. + dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); + dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); + dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + Product_ABt(Temp, C_partial_view, Tempv); + evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); } + } } -template -void -LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, int first, - int last, const ParticleSetT& source, int iat_src, GradMatrix& gradphi) +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) { - if (Identity) { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateGradSourceV( - P, iat, source, iat_src, this->Temp); - evaluate_ionderiv_v_impl(Temp, i, gradphi); - } - } - else { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateGradSourceV( - P, iat, source, iat_src, this->Temp); - Product_ABt(this->Temp, *C, this->Tempv); - evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); - } + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); } + } } -template -void -LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, int first, - int last, const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { - if (Identity) { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateGradSourceVGL( - P, iat, source, iat_src, this->Tempgh); - evaluate_ionderiv_vgl_impl( - this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); - } - } - else { - for (size_t i = 0, iat = first; iat < last; i++, iat++) { - myBasisSet->evaluateGradSourceVGL( - P, iat, source, iat_src, this->Tempgh); - Product_ABt(this->Tempgh, *C, this->Tempghv); - evaluate_ionderiv_vgl_impl( - this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); - } + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); + Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); + evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); } + } } -template -void -LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSetT& P, int iel, - const ParticleSetT& source, int iat_src, GradVector& gradphi) +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) { - if (Identity) { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); - evaluate_ionderiv_v_row_impl(this->Temp, gradphi); + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + evaluate_ionderiv_v_impl(Temp, i, gradphi); } - else { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); - Product_ABt(Temp, *C, this->Tempv); - evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + Product_ABt(this->Temp, *C, this->Tempv); + evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); } -} - -template -void -LCAOrbitalSetT::applyRotation( - const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (!use_stored_copy) - *C_copy = *C; - // gemm is out-of-place - BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, - this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, - rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), - BasisSetSize); - - /* debugging code + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + Product_ABt(this->Tempgh, *C, this->Tempghv); + evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& gradphi) +{ + if (Identity) + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + evaluate_ionderiv_v_row_impl(this->Temp, gradphi); + } + else + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + Product_ABt(Temp, *C, this->Tempv); + evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); + } +} + +template +void LCAOrbitalSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +{ + if (!use_stored_copy) + *C_copy = *C; + // gemm is out-of-place + BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), + BasisSetSize, rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); + + /* debugging code app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl; for (int j = 0; j < OrbitalSetSize; j++) for (int i = 0; i < BasisSetSize; i++) diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h index 29f8c897d22..a356bdd6f52 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -27,210 +27,190 @@ namespace qmcplusplus * SoA verson of LCOrtbitalSet * Localized basis set is always real */ -template +template class LCAOrbitalSetT : public SPOSetT { public: - using basis_type = SoaBasisSetBaseT; - using vgl_type = typename basis_type::vgl_type; - using vgh_type = typename basis_type::vgh_type; - using vghgh_type = typename basis_type::vghgh_type; - - using IndexType = typename SPOSetT::IndexType; - using RealType = typename SPOSetT::RealType; - using ComplexType = typename SPOSetT::ComplexType; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessMatrix = typename SPOSetT::HessMatrix; - using PosType = typename SPOSetT::PosType; - using HessVector = typename SPOSetT::HessVector; - using GGGMatrix = typename SPOSetT::GGGMatrix; - using GGGVector = typename SPOSetT::GGGVector; - using GradType = typename SPOSetT::GradType; - using OffloadMWVGLArray = typename basis_type::OffloadMWVGLArray; - using OffloadMWVArray = typename basis_type::OffloadMWVArray; - - /// pointer to the basis set - std::unique_ptr myBasisSet; - /// pointer to matrix containing the coefficients - std::shared_ptr C; - - /** constructor + using basis_type = SoaBasisSetBaseT; + using vgl_type = typename basis_type::vgl_type; + using vgh_type = typename basis_type::vgh_type; + using vghgh_type = typename basis_type::vghgh_type; + + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using PosType = typename SPOSetT::PosType; + using HessVector = typename SPOSetT::HessVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GradType = typename SPOSetT::GradType; + using OffloadMWVGLArray = typename basis_type::OffloadMWVGLArray; + using OffloadMWVArray = typename basis_type::OffloadMWVArray; + + /// pointer to the basis set + std::unique_ptr myBasisSet; + /// pointer to matrix containing the coefficients + std::shared_ptr C; + + /** constructor * @param bs pointer to the BasisSet */ - LCAOrbitalSetT( - const std::string& my_name, std::unique_ptr&& bs); - - LCAOrbitalSetT(const LCAOrbitalSetT& in); - - std::string - getClassName() const final - { - return "LCAOrbitalSetT"; - } - - bool - isRotationSupported() const final - { - return true; - } - - bool - hasIonDerivs() const final - { - return true; - } - - std::unique_ptr> - makeClone() const final; - - void - storeParamsBeforeRotation() final - { - C_copy = std::make_shared(*C); - } - - void - applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; - - /** set the OrbitalSetSize and Identity=false and initialize internal + LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs); + + LCAOrbitalSetT(const LCAOrbitalSetT& in); + + std::string getClassName() const final { return "LCAOrbitalSetT"; } + + bool isRotationSupported() const final { return true; } + + bool hasIonDerivs() const final { return true; } + + std::unique_ptr> makeClone() const final; + + void storeParamsBeforeRotation() final { C_copy = std::make_shared(*C); } + + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; + + /** set the OrbitalSetSize and Identity=false and initialize internal * storages */ - void - setOrbitalSetSize(int norbs) final; + void setOrbitalSetSize(int norbs) final; - /** return the size of the basis set + /** return the size of the basis set */ - int - getBasisSetSize() const - { - return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); - } - - bool - isIdentity() const - { - return Identity; - }; - - /** check consistency between Identity and C + int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); } + + bool isIdentity() const { return Identity; }; + + /** check consistency between Identity and C * */ - void - checkObject() const final; - - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; - - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) final; - - void - mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const final; - - void - mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final; - - void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final; - - void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) final; - - void - mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const final; - - void - evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) final; - - void - evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) final; - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) final; - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) final; - - // NOTE: The data types get complicated here, so here's an overview of the - // data types associated with ionic derivatives, and how to get their - // data. - // - // NOTE: These data structures hold the data for one particular ion, and so - // the ID is implicit. - // It's up to the user to keep track of which ion these derivatives - // refer to. - // - // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for - // each electron. - // Example: grad_phi[iel][iorb][idim]. iel -- electron index. - // iorb -- orbital index. - // idim -- cartesian index - // of ionic derivative. - // X=0, Y=1, Z=2. - // - // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the - // electron gradient components - // for each SPO and each electron. - // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- - // electron index. - // iorb -- - // orbital index. - // idim -- ionic - // derivative's - // cartesian - // index. - // X=0, Y=1, - // Z=2 - // edim -- - // electron - // derivative's - // cartesian - // index. - // x=0, y=1, - // z=2. - // - // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the - // electron laplacian for each SPO and each electron. - // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron - // index. - // iorb -- orbital - // index. idim -- - // cartesian index of - // ionic derivative. - // X=0, Y=1, Z=2. - - /** + void checkObject() const final; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const final; + + void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final; + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) final; + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const final; + + void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) final; + + void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) final; + + // NOTE: The data types get complicated here, so here's an overview of the + // data types associated with ionic derivatives, and how to get their + // data. + // + // NOTE: These data structures hold the data for one particular ion, and so + // the ID is implicit. + // It's up to the user to keep track of which ion these derivatives + // refer to. + // + // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for + // each electron. + // Example: grad_phi[iel][iorb][idim]. iel -- electron index. + // iorb -- orbital index. + // idim -- cartesian index + // of ionic derivative. + // X=0, Y=1, Z=2. + // + // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the + // electron gradient components + // for each SPO and each electron. + // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- + // electron index. + // iorb -- + // orbital index. + // idim -- ionic + // derivative's + // cartesian + // index. + // X=0, Y=1, + // Z=2 + // edim -- + // electron + // derivative's + // cartesian + // index. + // x=0, y=1, + // z=2. + // + // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the + // electron laplacian for each SPO and each electron. + // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron + // index. + // iorb -- orbital + // index. idim -- + // cartesian index of + // ionic derivative. + // X=0, Y=1, Z=2. + + /** * \brief Calculate ion derivatives of SPO's. * * @param P Electron particle set. @@ -241,11 +221,14 @@ class LCAOrbitalSetT : public SPOSetT * @param gradphi Container storing ion gradients for all particles and all * orbitals. */ - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi) final; - - /** + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi) final; + + /** * \brief Calculate ion derivatives of SPO's, their gradients, and their * laplacians. * @@ -261,125 +244,129 @@ class LCAOrbitalSetT : public SPOSetT * @param grad_lapl_phi Container storing ion gradients of SPO laplacians * for all particles and all orbitals. */ - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) final; - - void - evaluateGradSourceRow(const ParticleSetT& P, int iel, - const ParticleSetT& source, int iat_src, GradVector& grad_phi) final; - - void - createResource(ResourceCollection& collection) const final; - void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const final; - void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const final; + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) final; + + void evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& grad_phi) final; + + void createResource(ResourceCollection& collection) const final; + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; protected: - /// number of Single-particle orbitals - const IndexType BasisSetSize; - /// a copy of the original C before orbital rotation is applied; - std::shared_ptr C_copy; - - /// true if C is an identity matrix - bool Identity; - /// Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L - vgl_type Temp; - /// Tempv(OrbitalSetSize) Tempv=C*Temp - vgl_type Tempv; - - /// These are temporary VectorSoAContainers to hold value, gradient, and - /// hessian for all basis or SPO functions evaluated at a given point. - /// Nbasis x [1(value)+3(gradient)+6(hessian)] - vgh_type Temph; - /// Norbitals x [1(value)+3(gradient)+6(hessian)] - vgh_type Temphv; - - /// These are temporary VectorSoAContainers to hold value, gradient, - /// hessian, and - /// gradient hessian for all basis or SPO functions evaluated at a given - /// point. - /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempgh; - /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempghv; + /// number of Single-particle orbitals + const IndexType BasisSetSize; + /// a copy of the original C before orbital rotation is applied; + std::shared_ptr C_copy; + + /// true if C is an identity matrix + bool Identity; + /// Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L + vgl_type Temp; + /// Tempv(OrbitalSetSize) Tempv=C*Temp + vgl_type Tempv; + + /// These are temporary VectorSoAContainers to hold value, gradient, and + /// hessian for all basis or SPO functions evaluated at a given point. + /// Nbasis x [1(value)+3(gradient)+6(hessian)] + vgh_type Temph; + /// Norbitals x [1(value)+3(gradient)+6(hessian)] + vgh_type Temphv; + + /// These are temporary VectorSoAContainers to hold value, gradient, + /// hessian, and + /// gradient hessian for all basis or SPO functions evaluated at a given + /// point. + /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempgh; + /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempghv; private: - /// helper functions to handle Identity - void - evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, - ValueVector& d2psi) const; - - void - evaluate_vgl_impl(const vgl_type& temp, int i, ValueMatrix& logdet, - GradMatrix& dlogdet, ValueMatrix& d2logdet) const; - /// These two functions unpack the data in vgh_type temp object into - /// wavefunction friendly data structures. - - /// This unpacks temp into vectors psi, dpsi, and d2psi. - void - evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, - HessVector& d2psi) const; - - /// Unpacks temp into the ith row (or electron index) of logdet, dlogdet, - /// dhlogdet. - void - evaluate_vgh_impl(const vgh_type& temp, int i, ValueMatrix& logdet, - GradMatrix& dlogdet, HessMatrix& dhlogdet) const; - /// Unpacks data in vghgh_type temp object into wavefunction friendly data - /// structures for value, gradient, hessian and gradient hessian. - void - evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi, - GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const; - - void - evaluate_vghgh_impl(const vghgh_type& temp, int i, ValueMatrix& logdet, - GradMatrix& dlogdet, HessMatrix& dhlogdet, GGGMatrix& dghlogdet) const; - - /// Unpacks data in vgl object and calculates/places ionic gradient result - /// into dlogdet. - void - evaluate_ionderiv_v_impl( - const vgl_type& temp, int i, GradMatrix& dlogdet) const; - - /// Unpacks data in vgl object and calculates/places ionic gradient of - /// value, - /// electron gradient, and electron laplacian result into dlogdet, - /// dglogdet, and dllogdet respectively. - void - evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i, - GradMatrix& dlogdet, HessMatrix& dglogdet, GradMatrix& dllogdet) const; - - /// Unpacks data in vgl object and calculates/places ionic gradient of a - /// single row (phi_j(r)) into dlogdet. - void - evaluate_ionderiv_v_row_impl( - const vgl_type& temp, GradVector& dlogdet) const; - - void - mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVGLArray& phi_vgl_v) const; - - /// packed walker GEMM implementation - void - mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVArray& phi_v) const; - - /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet - RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader>& spo_list) const; - - struct LCAOMultiWalkerMem; - ResourceHandle mw_mem_handle_; - /// timer for basis set - NewTimer& basis_timer_; - /// timer for MO - NewTimer& mo_timer_; + /// helper functions to handle Identity + void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const; + + void evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const; + /// These two functions unpack the data in vgh_type temp object into + /// wavefunction friendly data structures. + + /// This unpacks temp into vectors psi, dpsi, and d2psi. + void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const; + + /// Unpacks temp into the ith row (or electron index) of logdet, dlogdet, + /// dhlogdet. + void evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet) const; + /// Unpacks data in vghgh_type temp object into wavefunction friendly data + /// structures for value, gradient, hessian and gradient hessian. + void evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const; + + void evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet, + GGGMatrix& dghlogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient result + /// into dlogdet. + void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient of + /// value, + /// electron gradient, and electron laplacian result into dlogdet, + /// dglogdet, and dllogdet respectively. + void evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dlogdet, + HessMatrix& dglogdet, + GradMatrix& dllogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient of a + /// single row (phi_j(r)) into dlogdet. + void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const; + + void mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const; + + /// packed walker GEMM implementation + void mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVArray& phi_v) const; + + /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet + RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader>& spo_list) const; + + struct LCAOMultiWalkerMem; + ResourceHandle mw_mem_handle_; + /// timer for basis set + NewTimer& basis_timer_; + /// timer for MO + NewTimer& mo_timer_; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp index 87b4e719d0b..17bd8c32b3f 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp @@ -13,60 +13,57 @@ namespace qmcplusplus { -template -LCAOrbitalSetWithCorrectionT::LCAOrbitalSetWithCorrectionT( - const std::string& my_name, ParticleSetT& ions, ParticleSetT& els, - std::unique_ptr&& bs) : - SPOSetT(my_name), - lcao(my_name + "_modified", std::move(bs)), - cusp(ions, els) -{ -} +template +LCAOrbitalSetWithCorrectionT::LCAOrbitalSetWithCorrectionT(const std::string& my_name, + ParticleSetT& ions, + ParticleSetT& els, + std::unique_ptr&& bs) + : SPOSetT(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) +{} -template -void -LCAOrbitalSetWithCorrectionT::setOrbitalSetSize(int norbs) +template +void LCAOrbitalSetWithCorrectionT::setOrbitalSetSize(int norbs) { - assert( - lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); - this->OrbitalSetSize = norbs; - cusp.setOrbitalSetSize(norbs); + assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); + this->OrbitalSetSize = norbs; + cusp.setOrbitalSetSize(norbs); } -template -std::unique_ptr> -LCAOrbitalSetWithCorrectionT::makeClone() const +template +std::unique_ptr> LCAOrbitalSetWithCorrectionT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void -LCAOrbitalSetWithCorrectionT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void LCAOrbitalSetWithCorrectionT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - lcao.evaluateValue(P, iat, psi); - cusp.addV(P, iat, psi); + lcao.evaluateValue(P, iat, psi); + cusp.addV(P, iat, psi); } -template -void -LCAOrbitalSetWithCorrectionT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void LCAOrbitalSetWithCorrectionT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); - cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); + lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); + cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); } -template -void -LCAOrbitalSetWithCorrectionT::evaluate_notranspose(const ParticleSetT& P, - int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void LCAOrbitalSetWithCorrectionT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); + lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); } template class LCAOrbitalSetWithCorrectionT; diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h index 8b0003d18fd..c6182a5d666 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h @@ -23,58 +23,52 @@ namespace qmcplusplus * */ -template +template class LCAOrbitalSetWithCorrectionT : public SPOSetT { public: - using basis_type = typename LCAOrbitalSetT::basis_type; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - /** constructor + using basis_type = typename LCAOrbitalSetT::basis_type; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + /** constructor * @param ions * @param els * @param bs pointer to the BasisSet * @param rl report level */ - LCAOrbitalSetWithCorrectionT(const std::string& my_name, - ParticleSetT& ions, ParticleSetT& els, - std::unique_ptr&& bs); + LCAOrbitalSetWithCorrectionT(const std::string& my_name, + ParticleSetT& ions, + ParticleSetT& els, + std::unique_ptr&& bs); - LCAOrbitalSetWithCorrectionT( - const LCAOrbitalSetWithCorrectionT& in) = default; + LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default; - std::string - getClassName() const final - { - return "LCAOrbitalSetWithCorrectionT"; - } + std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; } - std::unique_ptr> - makeClone() const final; + std::unique_ptr> makeClone() const final; - void - setOrbitalSetSize(int norbs) final; + void setOrbitalSetSize(int norbs) final; - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) final; + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; - template - friend class LCAOrbitalBuilderT; + template + friend class LCAOrbitalBuilderT; private: - LCAOrbitalSetT lcao; + LCAOrbitalSetT lcao; - SoaCuspCorrectionT cusp; + SoaCuspCorrectionT cusp; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h index 33aa7070964..0866c165548 100644 --- a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h +++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h @@ -19,34 +19,31 @@ namespace qmcplusplus { -template +template struct CorrectPhaseFunctor { - const TinyVector& superTwist; + const TinyVector& superTwist; - template - T - operator()(PosType Tv) const - { - return 1.0; - } + template + T operator()(PosType Tv) const + { + return 1.0; + } }; -template +template struct CorrectPhaseFunctor> { - const TinyVector& superTwist; - - template - std::complex - operator()(PosType Tv) const - { - T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] + - superTwist[2] * Tv[2]; - T s, c; - qmcplusplus::sincos(-phasearg, &s, &c); - return {c, s}; - }; + const TinyVector& superTwist; + + template + std::complex operator()(PosType Tv) const + { + T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] + superTwist[2] * Tv[2]; + T s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + return {c, s}; + }; }; /* A basis set for a center type @@ -56,768 +53,694 @@ struct CorrectPhaseFunctor> * * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$ */ -template +template struct SoaAtomicBasisSetT { - using RadialOrbital_t = ROT; - using RealType = typename ROT::RealType; - using GridType = typename ROT::GridType; - using ValueType = ORBT; - using OffloadArray4D = Array>; - using OffloadArray3D = Array>; - using OffloadMatrix = Matrix>; - using OffloadVector = Vector>; - - /// multi walker shared memory buffer - struct SoaAtomicBSetMultiWalkerMem; - /// multi walker resource handle - ResourceHandle mw_mem_handle_; - /// size of the basis set - int BasisSetSize; - /// Number of Cell images for the evaluation of the orbital with PBC. If No - /// PBC, should be 0; - TinyVector PBCImages; - /// Coordinates of SuperTwist - TinyVector SuperTwist; - /// Phase Factor array - std::vector periodic_image_phase_factors; - /// maximum radius of this center - RealType Rmax; - /// spherical harmonics - SH Ylm; - /// radial orbitals - ROT MultiRnl; - /// index of the corresponding real Spherical Harmonic with quantum numbers - /// \f$ (l,m) \f$ - aligned_vector LM; - /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) + using RadialOrbital_t = ROT; + using RealType = typename ROT::RealType; + using GridType = typename ROT::GridType; + using ValueType = ORBT; + using OffloadArray4D = Array>; + using OffloadArray3D = Array>; + using OffloadMatrix = Matrix>; + using OffloadVector = Vector>; + + /// multi walker shared memory buffer + struct SoaAtomicBSetMultiWalkerMem; + /// multi walker resource handle + ResourceHandle mw_mem_handle_; + /// size of the basis set + int BasisSetSize; + /// Number of Cell images for the evaluation of the orbital with PBC. If No + /// PBC, should be 0; + TinyVector PBCImages; + /// Coordinates of SuperTwist + TinyVector SuperTwist; + /// Phase Factor array + std::vector periodic_image_phase_factors; + /// maximum radius of this center + RealType Rmax; + /// spherical harmonics + SH Ylm; + /// radial orbitals + ROT MultiRnl; + /// index of the corresponding real Spherical Harmonic with quantum numbers + /// \f$ (l,m) \f$ + aligned_vector LM; + /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) * \f$ */ - aligned_vector NL; - /// container for the quantum-numbers - std::vector RnlID; - /// temporary storage - VectorSoaContainer tempS; - - /// the constructor - explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) : - Ylm(lmax, addsignforM) - { - } - - void - checkInVariables(opt_variables_type& active) - { - // for(size_t nl=0; nlcheckInVariables(active); - } - - void - checkOutVariables(const opt_variables_type& active) - { - // for(size_t nl=0; nlcheckOutVariables(active); - } - - void - resetParameters(const opt_variables_type& active) - { - // for(size_t nl=0; nlresetParameters(active); - } - - /** return the number of basis functions + aligned_vector NL; + /// container for the quantum-numbers + std::vector RnlID; + /// temporary storage + VectorSoaContainer tempS; + + /// the constructor + explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) : Ylm(lmax, addsignforM) {} + + void checkInVariables(opt_variables_type& active) + { + // for(size_t nl=0; nlcheckInVariables(active); + } + + void checkOutVariables(const opt_variables_type& active) + { + // for(size_t nl=0; nlcheckOutVariables(active); + } + + void resetParameters(const opt_variables_type& active) + { + // for(size_t nl=0; nlresetParameters(active); + } + + /** return the number of basis functions */ - inline int - getBasisSetSize() const - { - //=NL.size(); - return BasisSetSize; - } + inline int getBasisSetSize() const + { + //=NL.size(); + return BasisSetSize; + } - /** Set the number of periodic image for the evaluation of the orbitals and + /** Set the number of periodic image for the evaluation of the orbitals and * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1), * SuperTwist(0,0,0) and the PhaseFactor=1. */ - void - setPBCParams(const TinyVector& pbc_images, - const TinyVector supertwist, - const std::vector& PeriodicImagePhaseFactors) - { - PBCImages = pbc_images; - periodic_image_phase_factors = PeriodicImagePhaseFactors; - SuperTwist = supertwist; - } - - /** implement a BasisSetBase virtual function + void setPBCParams(const TinyVector& pbc_images, + const TinyVector supertwist, + const std::vector& PeriodicImagePhaseFactors) + { + PBCImages = pbc_images; + periodic_image_phase_factors = PeriodicImagePhaseFactors; + SuperTwist = supertwist; + } + + /** implement a BasisSetBase virtual function * * Set Rmax and BasisSetSize * @todo Should be able to overwrite Rmax to be much smaller than the * maximum grid */ - inline void - setBasisSetSize(int n) + inline void setBasisSetSize(int n) + { + BasisSetSize = LM.size(); + tempS.resize(std::max(Ylm.size(), RnlID.size())); + } + + /** Set Rmax */ + template + inline void setRmax(RealType rmax) + { + Rmax = (rmax > 0) ? rmax : MultiRnl.rmax(); + } + + /// set the current offset + inline void setCenter(int c, int offset) {} + + /// Sets a boolean vector for S-type orbitals. Used for cusp correction. + void queryOrbitalsForSType(std::vector& s_orbitals) const + { + for (int i = 0; i < BasisSetSize; i++) { - BasisSetSize = LM.size(); - tempS.resize(std::max(Ylm.size(), RnlID.size())); + s_orbitals[i] = (RnlID[NL[i]][1] == 0); } + } - /** Set Rmax */ - template - inline void - setRmax(RealType rmax) + /** evaluate VGL + */ + template + inline void evaluateVGL(const LAT& lattice, + const RealType r, + const PosType& dr, + const size_t offset, + VGL& vgl, + PosType Tv) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new; + + const ValueType correctphase = CorrectPhaseFunctor{SuperTwist}(Tv); + + constexpr RealType cone(1); + constexpr RealType ctwo(2); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vgl.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vgl.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vgl.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vgl.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + auto* restrict d2psi = vgl.data(4) + offset; + const RealType* restrict ylm_l = Ylm[4]; // lap + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { - Rmax = (rmax > 0) ? rmax : MultiRnl.rmax(); + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + d2psi[ib] = 0; } - - /// set the current offset - inline void - setCenter(int c, int offset) + // Phase_idx (iter) needs to be initialized at -1 as it has to be + // incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + + iter++; + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGL(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const RealType rinv = cone / r_new; + + /// Phase for PBC containing the phase for the nearest image + /// displacement and the correction due to the Distance + /// table. + const ValueType Phase = periodic_image_phase_factors[iter] * correctphase; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + const RealType vr = phi[nl]; + + psi[ib] += ang * vr * Phase; + dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase; + dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase; + dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase; + d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) + + vr * ylm_l[lm]) * + Phase; + } + } + } } - - /// Sets a boolean vector for S-type orbitals. Used for cusp correction. - void - queryOrbitalsForSType(std::vector& s_orbitals) const + } + + template + inline void evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr, const size_t offset, VGH& vgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + + constexpr RealType cone(1); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vgh.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vgh.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vgh.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vgh.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + + auto* restrict dhpsi_xx = vgh.data(4) + offset; + const RealType* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vgh.data(5) + offset; + const RealType* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vgh.data(6) + offset; + const RealType* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vgh.data(7) + offset; + const RealType* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vgh.data(8) + offset; + const RealType* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vgh.data(9) + offset; + const RealType* restrict ylm_zz = Ylm[9]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { - for (int i = 0; i < BasisSetSize; i++) { - s_orbitals[i] = (RnlID[NL[i]][1] == 0); - } + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + // d2psi[ib] = 0; } - /** evaluate VGL - */ - template - inline void - evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr, - const size_t offset, VGL& vgl, PosType Tv) + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X { - int TransX, TransY, TransZ; - - PosType dr_new; - RealType r_new; - // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new; - - const ValueType correctphase = - CorrectPhaseFunctor{SuperTwist}(Tv); - - constexpr RealType cone(1); - constexpr RealType ctwo(2); - - // one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - - // V,Gx,Gy,Gz,L - auto* restrict psi = vgl.data(0) + offset; - const RealType* restrict ylm_v = Ylm[0]; // value - auto* restrict dpsi_x = vgl.data(1) + offset; - const RealType* restrict ylm_x = Ylm[1]; // gradX - auto* restrict dpsi_y = vgl.data(2) + offset; - const RealType* restrict ylm_y = Ylm[2]; // gradY - auto* restrict dpsi_z = vgl.data(3) + offset; - const RealType* restrict ylm_z = Ylm[3]; // gradZ - auto* restrict d2psi = vgl.data(4) + offset; - const RealType* restrict ylm_l = Ylm[4]; // lap - - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - psi[ib] = 0; - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - d2psi[ib] = 0; - } - // Phase_idx (iter) needs to be initialized at -1 as it has to be - // incremented first to comply with the if statement (r_new >=Rmax) - int iter = -1; - for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; - k++) // loop Translation over Z - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - - dr_new[0] = dr[0] + - (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + - TransZ * lattice.R(2, 0)); - dr_new[1] = dr[1] + - (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + - TransZ * lattice.R(2, 1)); - dr_new[2] = dr[2] + - (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + - TransZ * lattice.R(2, 2)); - - r_new = std::sqrt(dot(dr_new, dr_new)); - - iter++; - if (r_new >= Rmax) - continue; - - // SIGN Change!! - const RealType x = -dr_new[0], y = -dr_new[1], - z = -dr_new[2]; - Ylm.evaluateVGL(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi); - - const RealType rinv = cone / r_new; - - /// Phase for PBC containing the phase for the nearest image - /// displacement and the correction due to the Distance - /// table. - const ValueType Phase = - periodic_image_phase_factors[iter] * correctphase; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - const int nl(NL[ib]); - const int lm(LM[ib]); - const RealType drnloverr = rinv * dphi[nl]; - const RealType ang = ylm_v[lm]; - const RealType gr_x = drnloverr * x; - const RealType gr_y = drnloverr * y; - const RealType gr_z = drnloverr * z; - const RealType ang_x = ylm_x[lm]; - const RealType ang_y = ylm_y[lm]; - const RealType ang_z = ylm_z[lm]; - const RealType vr = phi[nl]; - - psi[ib] += ang * vr * Phase; - dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase; - dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase; - dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase; - d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + - ctwo * - (gr_x * ang_x + gr_y * ang_y + - gr_z * ang_z) + - vr * ylm_l[lm]) * - Phase; - } - } - } + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + // const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const RealType rinv = cone / r_new; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + + // The non-strictly diagonal term in \partial_i + // \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 + // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial + // R_{nl}}{\partial r}) To save recomputation, I + // evaluate everything except the x_i*x_j term once, + // and store it in gr2_tmp. The full term is obtained + // by x_i*x_j*gr2_tmp. + const RealType gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr); + const RealType gr_xx = x * x * gr2_tmp + drnloverr; + const RealType gr_xy = x * y * gr2_tmp; + const RealType gr_xz = x * z * gr2_tmp; + const RealType gr_yy = y * y * gr2_tmp + drnloverr; + const RealType gr_yz = y * z * gr2_tmp; + const RealType gr_zz = z * z * gr2_tmp + drnloverr; + + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + const RealType ang_xx = ylm_xx[lm]; + const RealType ang_xy = ylm_xy[lm]; + const RealType ang_xz = ylm_xz[lm]; + const RealType ang_yy = ylm_yy[lm]; + const RealType ang_yz = ylm_yz[lm]; + const RealType ang_zz = ylm_zz[lm]; + + const RealType vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j + // R + R \partial_i \partial_j Y + // + (\partial_i R) + // (\partial_j Y) + + // (\partial_j R)(\partial_i + // Y) + dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + } } + } } - - template - inline void - evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr, - const size_t offset, VGH& vgh) + } + + template + inline void evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr, const size_t offset, VGHGH& vghgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + RealType r_new; + + constexpr RealType cone(1); + + // one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + RealType* restrict d3phi = tempS.data(3); + + // V,Gx,Gy,Gz,L + auto* restrict psi = vghgh.data(0) + offset; + const RealType* restrict ylm_v = Ylm[0]; // value + auto* restrict dpsi_x = vghgh.data(1) + offset; + const RealType* restrict ylm_x = Ylm[1]; // gradX + auto* restrict dpsi_y = vghgh.data(2) + offset; + const RealType* restrict ylm_y = Ylm[2]; // gradY + auto* restrict dpsi_z = vghgh.data(3) + offset; + const RealType* restrict ylm_z = Ylm[3]; // gradZ + + auto* restrict dhpsi_xx = vghgh.data(4) + offset; + const RealType* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vghgh.data(5) + offset; + const RealType* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vghgh.data(6) + offset; + const RealType* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vghgh.data(7) + offset; + const RealType* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vghgh.data(8) + offset; + const RealType* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vghgh.data(9) + offset; + const RealType* restrict ylm_zz = Ylm[9]; + + auto* restrict dghpsi_xxx = vghgh.data(10) + offset; + const RealType* restrict ylm_xxx = Ylm[10]; + auto* restrict dghpsi_xxy = vghgh.data(11) + offset; + const RealType* restrict ylm_xxy = Ylm[11]; + auto* restrict dghpsi_xxz = vghgh.data(12) + offset; + const RealType* restrict ylm_xxz = Ylm[12]; + auto* restrict dghpsi_xyy = vghgh.data(13) + offset; + const RealType* restrict ylm_xyy = Ylm[13]; + auto* restrict dghpsi_xyz = vghgh.data(14) + offset; + const RealType* restrict ylm_xyz = Ylm[14]; + auto* restrict dghpsi_xzz = vghgh.data(15) + offset; + const RealType* restrict ylm_xzz = Ylm[15]; + auto* restrict dghpsi_yyy = vghgh.data(16) + offset; + const RealType* restrict ylm_yyy = Ylm[16]; + auto* restrict dghpsi_yyz = vghgh.data(17) + offset; + const RealType* restrict ylm_yyz = Ylm[17]; + auto* restrict dghpsi_yzz = vghgh.data(18) + offset; + const RealType* restrict ylm_yzz = Ylm[18]; + auto* restrict dghpsi_zzz = vghgh.data(19) + offset; + const RealType* restrict ylm_zzz = Ylm[19]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) { - int TransX, TransY, TransZ; - - PosType dr_new; - RealType r_new; - - constexpr RealType cone(1); - - // one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - - // V,Gx,Gy,Gz,L - auto* restrict psi = vgh.data(0) + offset; - const RealType* restrict ylm_v = Ylm[0]; // value - auto* restrict dpsi_x = vgh.data(1) + offset; - const RealType* restrict ylm_x = Ylm[1]; // gradX - auto* restrict dpsi_y = vgh.data(2) + offset; - const RealType* restrict ylm_y = Ylm[2]; // gradY - auto* restrict dpsi_z = vgh.data(3) + offset; - const RealType* restrict ylm_z = Ylm[3]; // gradZ - - auto* restrict dhpsi_xx = vgh.data(4) + offset; - const RealType* restrict ylm_xx = Ylm[4]; - auto* restrict dhpsi_xy = vgh.data(5) + offset; - const RealType* restrict ylm_xy = Ylm[5]; - auto* restrict dhpsi_xz = vgh.data(6) + offset; - const RealType* restrict ylm_xz = Ylm[6]; - auto* restrict dhpsi_yy = vgh.data(7) + offset; - const RealType* restrict ylm_yy = Ylm[7]; - auto* restrict dhpsi_yz = vgh.data(8) + offset; - const RealType* restrict ylm_yz = Ylm[8]; - auto* restrict dhpsi_zz = vgh.data(9) + offset; - const RealType* restrict ylm_zz = Ylm[9]; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - psi[ib] = 0; - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - dhpsi_xx[ib] = 0; - dhpsi_xy[ib] = 0; - dhpsi_xz[ib] = 0; - dhpsi_yy[ib] = 0; - dhpsi_yz[ib] = 0; - dhpsi_zz[ib] = 0; - // d2psi[ib] = 0; - } - - for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; - k++) // loop Translation over Z - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + - TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); - dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + - TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); - dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + - TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); - r_new = std::sqrt(dot(dr_new, dr_new)); - - // const size_t ib_max=NL.size(); - if (r_new >= Rmax) - continue; - - // SIGN Change!! - const RealType x = -dr_new[0], y = -dr_new[1], - z = -dr_new[2]; - Ylm.evaluateVGH(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi); - - const RealType rinv = cone / r_new; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - const int nl(NL[ib]); - const int lm(LM[ib]); - const RealType drnloverr = rinv * dphi[nl]; - const RealType ang = ylm_v[lm]; - const RealType gr_x = drnloverr * x; - const RealType gr_y = drnloverr * y; - const RealType gr_z = drnloverr * z; - - // The non-strictly diagonal term in \partial_i - // \partial_j R_{nl} is - // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 - // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial - // R_{nl}}{\partial r}) To save recomputation, I - // evaluate everything except the x_i*x_j term once, - // and store it in gr2_tmp. The full term is obtained - // by x_i*x_j*gr2_tmp. - const RealType gr2_tmp = - rinv * rinv * (d2phi[nl] - drnloverr); - const RealType gr_xx = x * x * gr2_tmp + drnloverr; - const RealType gr_xy = x * y * gr2_tmp; - const RealType gr_xz = x * z * gr2_tmp; - const RealType gr_yy = y * y * gr2_tmp + drnloverr; - const RealType gr_yz = y * z * gr2_tmp; - const RealType gr_zz = z * z * gr2_tmp + drnloverr; - - const RealType ang_x = ylm_x[lm]; - const RealType ang_y = ylm_y[lm]; - const RealType ang_z = ylm_z[lm]; - const RealType ang_xx = ylm_xx[lm]; - const RealType ang_xy = ylm_xy[lm]; - const RealType ang_xz = ylm_xz[lm]; - const RealType ang_yy = ylm_yy[lm]; - const RealType ang_yz = ylm_yz[lm]; - const RealType ang_zz = ylm_zz[lm]; - - const RealType vr = phi[nl]; - - psi[ib] += ang * vr; - dpsi_x[ib] += ang * gr_x + vr * ang_x; - dpsi_y[ib] += ang * gr_y + vr * ang_y; - dpsi_z[ib] += ang * gr_z + vr * ang_z; - - // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j - // R + R \partial_i \partial_j Y - // + (\partial_i R) - // (\partial_j Y) + - // (\partial_j R)(\partial_i - // Y) - dhpsi_xx[ib] += - gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; - dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + - gr_x * ang_y + gr_y * ang_x; - dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + - gr_x * ang_z + gr_z * ang_x; - dhpsi_yy[ib] += - gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; - dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + - gr_y * ang_z + gr_z * ang_y; - dhpsi_zz[ib] += - gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; - } - } - } - } + psi[ib] = 0; + + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + + dghpsi_xxx[ib] = 0; + dghpsi_xxy[ib] = 0; + dghpsi_xxz[ib] = 0; + dghpsi_xyy[ib] = 0; + dghpsi_xyz[ib] = 0; + dghpsi_xzz[ib] = 0; + dghpsi_yyy[ib] = 0; + dghpsi_yyz[ib] = 0; + dghpsi_yzz[ib] = 0; + dghpsi_zzz[ib] = 0; } - template - inline void - evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr, - const size_t offset, VGHGH& vghgh) + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X { - int TransX, TransY, TransZ; - - PosType dr_new; - RealType r_new; - - constexpr RealType cone(1); - - // one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - RealType* restrict d3phi = tempS.data(3); - - // V,Gx,Gy,Gz,L - auto* restrict psi = vghgh.data(0) + offset; - const RealType* restrict ylm_v = Ylm[0]; // value - auto* restrict dpsi_x = vghgh.data(1) + offset; - const RealType* restrict ylm_x = Ylm[1]; // gradX - auto* restrict dpsi_y = vghgh.data(2) + offset; - const RealType* restrict ylm_y = Ylm[2]; // gradY - auto* restrict dpsi_z = vghgh.data(3) + offset; - const RealType* restrict ylm_z = Ylm[3]; // gradZ - - auto* restrict dhpsi_xx = vghgh.data(4) + offset; - const RealType* restrict ylm_xx = Ylm[4]; - auto* restrict dhpsi_xy = vghgh.data(5) + offset; - const RealType* restrict ylm_xy = Ylm[5]; - auto* restrict dhpsi_xz = vghgh.data(6) + offset; - const RealType* restrict ylm_xz = Ylm[6]; - auto* restrict dhpsi_yy = vghgh.data(7) + offset; - const RealType* restrict ylm_yy = Ylm[7]; - auto* restrict dhpsi_yz = vghgh.data(8) + offset; - const RealType* restrict ylm_yz = Ylm[8]; - auto* restrict dhpsi_zz = vghgh.data(9) + offset; - const RealType* restrict ylm_zz = Ylm[9]; - - auto* restrict dghpsi_xxx = vghgh.data(10) + offset; - const RealType* restrict ylm_xxx = Ylm[10]; - auto* restrict dghpsi_xxy = vghgh.data(11) + offset; - const RealType* restrict ylm_xxy = Ylm[11]; - auto* restrict dghpsi_xxz = vghgh.data(12) + offset; - const RealType* restrict ylm_xxz = Ylm[12]; - auto* restrict dghpsi_xyy = vghgh.data(13) + offset; - const RealType* restrict ylm_xyy = Ylm[13]; - auto* restrict dghpsi_xyz = vghgh.data(14) + offset; - const RealType* restrict ylm_xyz = Ylm[14]; - auto* restrict dghpsi_xzz = vghgh.data(15) + offset; - const RealType* restrict ylm_xzz = Ylm[15]; - auto* restrict dghpsi_yyy = vghgh.data(16) + offset; - const RealType* restrict ylm_yyy = Ylm[16]; - auto* restrict dghpsi_yyz = vghgh.data(17) + offset; - const RealType* restrict ylm_yyz = Ylm[17]; - auto* restrict dghpsi_yzz = vghgh.data(18) + offset; - const RealType* restrict ylm_yzz = Ylm[18]; - auto* restrict dghpsi_zzz = vghgh.data(19) + offset; - const RealType* restrict ylm_zzz = Ylm[19]; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - psi[ib] = 0; - - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - - dhpsi_xx[ib] = 0; - dhpsi_xy[ib] = 0; - dhpsi_xz[ib] = 0; - dhpsi_yy[ib] = 0; - dhpsi_yz[ib] = 0; - dhpsi_zz[ib] = 0; - - dghpsi_xxx[ib] = 0; - dghpsi_xxy[ib] = 0; - dghpsi_xxz[ib] = 0; - dghpsi_xyy[ib] = 0; - dghpsi_xyz[ib] = 0; - dghpsi_xzz[ib] = 0; - dghpsi_yyy[ib] = 0; - dghpsi_yyz[ib] = 0; - dghpsi_yzz[ib] = 0; - dghpsi_zzz[ib] = 0; - } - - for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; - k++) // loop Translation over Z - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + - TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); - dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + - TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); - dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + - TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); - r_new = std::sqrt(dot(dr_new, dr_new)); - - // const size_t ib_max=NL.size(); - if (r_new >= Rmax) - continue; - - // SIGN Change!! - const RealType x = -dr_new[0], y = -dr_new[1], - z = -dr_new[2]; - Ylm.evaluateVGHGH(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi); - - const RealType rinv = cone / r_new; - const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv; - for (size_t ib = 0; ib < BasisSetSize; ++ib) { - const int nl(NL[ib]); - const int lm(LM[ib]); - const RealType drnloverr = rinv * dphi[nl]; - const RealType ang = ylm_v[lm]; - const RealType gr_x = drnloverr * x; - const RealType gr_y = drnloverr * y; - const RealType gr_z = drnloverr * z; - - // The non-strictly diagonal term in \partial_i - // \partial_j R_{nl} is - // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 - // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial - // R_{nl}}{\partial r}) To save recomputation, I - // evaluate everything except the x_i*x_j term once, - // and store it in gr2_tmp. The full term is obtained - // by x_i*x_j*gr2_tmp. This is p(r) in the notes. - const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr); - - const RealType gr_xx = x * xu * gr2_tmp + drnloverr; - const RealType gr_xy = x * yu * gr2_tmp; - const RealType gr_xz = x * zu * gr2_tmp; - const RealType gr_yy = y * yu * gr2_tmp + drnloverr; - const RealType gr_yz = y * zu * gr2_tmp; - const RealType gr_zz = z * zu * gr2_tmp + drnloverr; - - // This is q(r) in the notes. - const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp; - - const RealType gr_xxx = - xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu); - const RealType gr_xxy = - xu * xu * yu * gr3_tmp + gr2_tmp * yu; - const RealType gr_xxz = - xu * xu * zu * gr3_tmp + gr2_tmp * zu; - const RealType gr_xyy = - xu * yu * yu * gr3_tmp + gr2_tmp * xu; - const RealType gr_xyz = xu * yu * zu * gr3_tmp; - const RealType gr_xzz = - xu * zu * zu * gr3_tmp + gr2_tmp * xu; - const RealType gr_yyy = - yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu); - const RealType gr_yyz = - yu * yu * zu * gr3_tmp + gr2_tmp * zu; - const RealType gr_yzz = - yu * zu * zu * gr3_tmp + gr2_tmp * yu; - const RealType gr_zzz = - zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu); - - // Angular derivatives up to third - const RealType ang_x = ylm_x[lm]; - const RealType ang_y = ylm_y[lm]; - const RealType ang_z = ylm_z[lm]; - - const RealType ang_xx = ylm_xx[lm]; - const RealType ang_xy = ylm_xy[lm]; - const RealType ang_xz = ylm_xz[lm]; - const RealType ang_yy = ylm_yy[lm]; - const RealType ang_yz = ylm_yz[lm]; - const RealType ang_zz = ylm_zz[lm]; - - const RealType ang_xxx = ylm_xxx[lm]; - const RealType ang_xxy = ylm_xxy[lm]; - const RealType ang_xxz = ylm_xxz[lm]; - const RealType ang_xyy = ylm_xyy[lm]; - const RealType ang_xyz = ylm_xyz[lm]; - const RealType ang_xzz = ylm_xzz[lm]; - const RealType ang_yyy = ylm_yyy[lm]; - const RealType ang_yyz = ylm_yyz[lm]; - const RealType ang_yzz = ylm_yzz[lm]; - const RealType ang_zzz = ylm_zzz[lm]; - - const RealType vr = phi[nl]; - - psi[ib] += ang * vr; - dpsi_x[ib] += ang * gr_x + vr * ang_x; - dpsi_y[ib] += ang * gr_y + vr * ang_y; - dpsi_z[ib] += ang * gr_z + vr * ang_z; - - // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j - // R + R \partial_i \partial_j Y - // + (\partial_i R) - // (\partial_j Y) + - // (\partial_j R)(\partial_i - // Y) - dhpsi_xx[ib] += - gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; - dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + - gr_x * ang_y + gr_y * ang_x; - dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + - gr_x * ang_z + gr_z * ang_x; - dhpsi_yy[ib] += - gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; - dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + - gr_y * ang_z + gr_z * ang_y; - dhpsi_zz[ib] += - gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; - - dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + - 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx; - dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy + - gr_xx * ang_y + ang_xx * gr_y + - 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x; - dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz + - gr_xx * ang_z + ang_xx * gr_z + - 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x; - dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy + - gr_yy * ang_x + ang_yy * gr_x + - 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y; - dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + - gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x + - ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y; - dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz + - gr_zz * ang_x + ang_zz * gr_x + - 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z; - dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + - 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy; - dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz + - gr_yy * ang_z + ang_yy * gr_z + - 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y; - dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz + - gr_zz * ang_y + ang_zz * gr_y + - 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z; - dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + - 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz; - } - } - } + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + // const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + // SIGN Change!! + const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGHGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi); + + const RealType rinv = cone / r_new; + const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv; + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const RealType drnloverr = rinv * dphi[nl]; + const RealType ang = ylm_v[lm]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + + // The non-strictly diagonal term in \partial_i + // \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 + // R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial + // R_{nl}}{\partial r}) To save recomputation, I + // evaluate everything except the x_i*x_j term once, + // and store it in gr2_tmp. The full term is obtained + // by x_i*x_j*gr2_tmp. This is p(r) in the notes. + const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr); + + const RealType gr_xx = x * xu * gr2_tmp + drnloverr; + const RealType gr_xy = x * yu * gr2_tmp; + const RealType gr_xz = x * zu * gr2_tmp; + const RealType gr_yy = y * yu * gr2_tmp + drnloverr; + const RealType gr_yz = y * zu * gr2_tmp; + const RealType gr_zz = z * zu * gr2_tmp + drnloverr; + + // This is q(r) in the notes. + const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp; + + const RealType gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu); + const RealType gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu; + const RealType gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu; + const RealType gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu; + const RealType gr_xyz = xu * yu * zu * gr3_tmp; + const RealType gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu; + const RealType gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu); + const RealType gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu; + const RealType gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu; + const RealType gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu); + + // Angular derivatives up to third + const RealType ang_x = ylm_x[lm]; + const RealType ang_y = ylm_y[lm]; + const RealType ang_z = ylm_z[lm]; + + const RealType ang_xx = ylm_xx[lm]; + const RealType ang_xy = ylm_xy[lm]; + const RealType ang_xz = ylm_xz[lm]; + const RealType ang_yy = ylm_yy[lm]; + const RealType ang_yz = ylm_yz[lm]; + const RealType ang_zz = ylm_zz[lm]; + + const RealType ang_xxx = ylm_xxx[lm]; + const RealType ang_xxy = ylm_xxy[lm]; + const RealType ang_xxz = ylm_xxz[lm]; + const RealType ang_xyy = ylm_xyy[lm]; + const RealType ang_xyz = ylm_xyz[lm]; + const RealType ang_xzz = ylm_xzz[lm]; + const RealType ang_yyy = ylm_yyy[lm]; + const RealType ang_yyz = ylm_yyz[lm]; + const RealType ang_yzz = ylm_yzz[lm]; + const RealType ang_zzz = ylm_zzz[lm]; + + const RealType vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j + // R + R \partial_i \partial_j Y + // + (\partial_i R) + // (\partial_j Y) + + // (\partial_j R)(\partial_i + // Y) + dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + + dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx; + dghpsi_xxy[ib] += + gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x; + dghpsi_xxz[ib] += + gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x; + dghpsi_xyy[ib] += + gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y; + dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x + + ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y; + dghpsi_xzz[ib] += + gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z; + dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy; + dghpsi_yyz[ib] += + gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y; + dghpsi_yzz[ib] += + gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z; + dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz; + } } + } } + } - /** evaluate V + /** evaluate V */ - template - inline void - evaluateV(const LAT& lattice, const RealType r, const PosType& dr, - VT* restrict psi, PosType Tv) - { - int TransX, TransY, TransZ; + template + inline void evaluateV(const LAT& lattice, const RealType r, const PosType& dr, VT* restrict psi, PosType Tv) + { + int TransX, TransY, TransZ; - PosType dr_new; - RealType r_new; + PosType dr_new; + RealType r_new; - const ValueType correctphase = - CorrectPhaseFunctor{SuperTwist}(Tv); + const ValueType correctphase = CorrectPhaseFunctor{SuperTwist}(Tv); - RealType* restrict ylm_v = tempS.data(0); - RealType* restrict phi_r = tempS.data(1); + RealType* restrict ylm_v = tempS.data(0); + RealType* restrict phi_r = tempS.data(1); - for (size_t ib = 0; ib < BasisSetSize; ++ib) - psi[ib] = 0; - // Phase_idx (iter) needs to be initialized at -1 as it has to be - // incremented first to comply with the if statement (r_new >=Rmax) - int iter = -1; - for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] = 0; + // Phase_idx (iter) needs to be initialized at -1 as it has to be + // incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y + { + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; - k++) // loop Translation over Z - { - // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - - dr_new[0] = dr[0] + - (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + - TransZ * lattice.R(2, 0)); - dr_new[1] = dr[1] + - (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + - TransZ * lattice.R(2, 1)); - dr_new[2] = dr[2] + - (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + - TransZ * lattice.R(2, 2)); - - r_new = std::sqrt(dot(dr_new, dr_new)); - iter++; - if (r_new >= Rmax) - continue; - - Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v); - MultiRnl.evaluate(r_new, phi_r); - /// Phase for PBC containing the phase for the nearest image - /// displacement and the correction due to the Distance - /// table. - const ValueType Phase = - periodic_image_phase_factors[iter] * correctphase; - for (size_t ib = 0; ib < BasisSetSize; ++ib) - psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase; - } - } + // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + iter++; + if (r_new >= Rmax) + continue; + + Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v); + MultiRnl.evaluate(r_new, phi_r); + /// Phase for PBC containing the phase for the nearest image + /// displacement and the correction due to the Distance + /// table. + const ValueType Phase = periodic_image_phase_factors[iter] * correctphase; + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase; } + } } - - void createResource(ResourceCollection& collection) const - { - collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& atom_basis_list) const - { - assert(this == &atom_basis_list.getLeader()); - atom_basis_list.template getCastedLeader().mw_mem_handle_ = - collection.lendResource(); - } - - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& atom_basis_list) const + } + + void createResource(ResourceCollection& collection) const + { + collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + atom_basis_list.template getCastedLeader().mw_mem_handle_ = + collection.lendResource(); + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + collection.takebackResource(atom_basis_list.template getCastedLeader().mw_mem_handle_); + } + + struct SoaAtomicBSetMultiWalkerMem : public Resource + { + SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {} + + SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { - assert(this == &atom_basis_list.getLeader()); - collection.takebackResource(atom_basis_list.template getCastedLeader().mw_mem_handle_); + return std::make_unique(*this); } - struct SoaAtomicBSetMultiWalkerMem : public Resource - { - SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {} - - SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {} - - std::unique_ptr makeClone() const override - { - return std::make_unique(*this); - } - - OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm] - OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl] - OffloadArray3D ylm_v; // [Nelec][PBC][NYlm] - OffloadArray3D rnl_v; // [Nelec][PBC][NRnl] - OffloadMatrix dr_pbc; // [PBC][xyz] translation vector for each image - OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image - OffloadMatrix r; // [Nelec][PBC] ion->elec distance for each image - OffloadVector correctphase; // [Nelec] overall phase - }; + OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm] + OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl] + OffloadArray3D ylm_v; // [Nelec][PBC][NYlm] + OffloadArray3D rnl_v; // [Nelec][PBC][NRnl] + OffloadMatrix dr_pbc; // [PBC][xyz] translation vector for each image + OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image + OffloadMatrix r; // [Nelec][PBC] ion->elec distance for each image + OffloadVector correctphase; // [Nelec] overall phase + }; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp index 85c17ef568b..c17fdddda80 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp @@ -18,164 +18,153 @@ namespace qmcplusplus { -template -SoaCuspCorrectionT::SoaCuspCorrectionT( - ParticleSetT& ions, ParticleSetT& els) : - myTableIndex(els.addTable(ions)) +template +SoaCuspCorrectionT::SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els) + : myTableIndex(els.addTable(ions)) { - NumCenters = ions.getTotalNum(); - NumTargets = els.getTotalNum(); - LOBasisSet.resize(NumCenters); + NumCenters = ions.getTotalNum(); + NumTargets = els.getTotalNum(); + LOBasisSet.resize(NumCenters); } -template -SoaCuspCorrectionT::SoaCuspCorrectionT( - const SoaCuspCorrectionT& a) = default; +template +SoaCuspCorrectionT::SoaCuspCorrectionT(const SoaCuspCorrectionT& a) = default; -template -void -SoaCuspCorrectionT::setOrbitalSetSize(int norbs) +template +void SoaCuspCorrectionT::setOrbitalSetSize(int norbs) { - MaxOrbSize = norbs; - myVGL.resize(5, MaxOrbSize); + MaxOrbSize = norbs; + myVGL.resize(5, MaxOrbSize); } -template -inline void -SoaCuspCorrectionT::evaluateVGL( - const ParticleSetT& P, int iat, VGLVector& vgl) +template +inline void SoaCuspCorrectionT::evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl) { - assert(MaxOrbSize >= vgl.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], - myVGL[2], myVGL[3], myVGL[4]); - - { - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - auto v_out = vgl.data(0); - auto gx_out = vgl.data(1); - auto gy_out = vgl.data(2); - auto gz_out = vgl.data(3); - auto l_out = vgl.data(4); - for (size_t i = 0; i < vgl.size(); ++i) { - v_out[i] += v_in[i]; - gx_out[i] += gx_in[i]; - gy_out[i] += gy_in[i]; - gz_out[i] += gz_in[i]; - l_out[i] += l_in[i]; - } - } -} - -template -void -SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - assert(MaxOrbSize >= psi.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], - myVGL[2], myVGL[3], myVGL[4]); - - const auto v_in = myVGL[0]; + assert(MaxOrbSize >= vgl.size()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); + + { + const auto v_in = myVGL[0]; const auto gx_in = myVGL[1]; const auto gy_in = myVGL[2]; const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.size(); ++i) { - psi[i] += v_in[i]; - dpsi[i][0] += gx_in[i]; - dpsi[i][1] += gy_in[i]; - dpsi[i][2] += gz_in[i]; - d2psi[i] += l_in[i]; + const auto l_in = myVGL[4]; + auto v_out = vgl.data(0); + auto gx_out = vgl.data(1); + auto gy_out = vgl.data(2); + auto gz_out = vgl.data(3); + auto l_out = vgl.data(4); + for (size_t i = 0; i < vgl.size(); ++i) + { + v_out[i] += v_in[i]; + gx_out[i] += gx_in[i]; + gy_out[i] += gy_in[i]; + gz_out[i] += gz_in[i]; + l_out[i] += l_in[i]; } + } } -template -void -SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, int iat, int idx, - ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi) +template +void SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - assert(MaxOrbSize >= psi.cols()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], - myVGL[2], myVGL[3], myVGL[4]); + assert(MaxOrbSize >= psi.size()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); + + const auto v_in = myVGL[0]; + const auto gx_in = myVGL[1]; + const auto gy_in = myVGL[2]; + const auto gz_in = myVGL[3]; + const auto l_in = myVGL[4]; + for (size_t i = 0; i < psi.size(); ++i) + { + psi[i] += v_in[i]; + dpsi[i][0] += gx_in[i]; + dpsi[i][1] += gy_in[i]; + dpsi[i][2] += gz_in[i]; + d2psi[i] += l_in[i]; + } +} - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.cols(); ++i) { - psi[idx][i] += v_in[i]; - dpsi[idx][i][0] += gx_in[i]; - dpsi[idx][i][1] += gy_in[i]; - dpsi[idx][i][2] += gz_in[i]; - d2psi[idx][i] += l_in[i]; - } +template +void SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, + int iat, + int idx, + ValueMatrix& psi, + GradMatrix& dpsi, + ValueMatrix& d2psi) +{ + assert(MaxOrbSize >= psi.cols()); + myVGL = 0.0; + + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); + + const auto v_in = myVGL[0]; + const auto gx_in = myVGL[1]; + const auto gy_in = myVGL[2]; + const auto gz_in = myVGL[3]; + const auto l_in = myVGL[4]; + for (size_t i = 0; i < psi.cols(); ++i) + { + psi[idx][i] += v_in[i]; + dpsi[idx][i][0] += gx_in[i]; + dpsi[idx][i][1] += gy_in[i]; + dpsi[idx][i][2] += gz_in[i]; + d2psi[idx][i] += l_in[i]; + } } -template -void -SoaCuspCorrectionT::evaluateV( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void SoaCuspCorrectionT::evaluateV(const ParticleSetT& P, int iat, ValueVector& psi) { - assert(MaxOrbSize >= psi.size()); - T* tmp_vals = myVGL[0]; + assert(MaxOrbSize >= psi.size()); + T* tmp_vals = myVGL[0]; - std::fill_n(tmp_vals, myVGL.size(), 0.0); + std::fill_n(tmp_vals, myVGL.size(), 0.0); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - // THIS IS SERIAL, only way to avoid this is to use myVGL - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate(dist[c], tmp_vals); + // THIS IS SERIAL, only way to avoid this is to use myVGL + for (int c = 0; c < NumCenters; c++) + if (LOBasisSet[c]) + LOBasisSet[c]->evaluate(dist[c], tmp_vals); - { // collect - const auto v_in = myVGL[0]; - for (size_t i = 0; i < psi.size(); ++i) - psi[i] += v_in[i]; - } + { // collect + const auto v_in = myVGL[0]; + for (size_t i = 0; i < psi.size(); ++i) + psi[i] += v_in[i]; + } } -template -void -SoaCuspCorrectionT::add(int icenter, std::unique_ptr aos) +template +void SoaCuspCorrectionT::add(int icenter, std::unique_ptr aos) { - assert(MaxOrbSize == aos->getNumOrbs() && - "All the centers should support the same number of orbitals!"); - LOBasisSet[icenter].reset(aos.release()); + assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!"); + LOBasisSet[icenter].reset(aos.release()); } template class SoaCuspCorrectionT; diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h index 0edf61af87e..9d11d883978 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h @@ -18,7 +18,7 @@ namespace qmcplusplus { -template +template class CuspCorrectionAtomicBasis; /** A localized basis set derived from BasisSetBase @@ -28,110 +28,90 @@ class CuspCorrectionAtomicBasis; * The template parameter COT denotes Centered-Orbital-Type which provides * a set of localized orbitals associated with a center. */ -template +template class SoaCuspCorrectionT { - using RealType = typename SPOSetT::RealType; - using VGLVector = VectorSoaContainer; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradVector = typename SPOSetT::GradVector; - using ValueVector = typename SPOSetT::ValueVector; - using PosType = typename SPOSetT::PosType; - - /// number of centers, e.g., ions - size_t NumCenters; - /// number of quantum particles - size_t NumTargets; - /// number of quantum particles - const int myTableIndex; - /** Maximal number of supported MOs + using RealType = typename SPOSetT::RealType; + using VGLVector = VectorSoaContainer; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using PosType = typename SPOSetT::PosType; + + /// number of centers, e.g., ions + size_t NumCenters; + /// number of quantum particles + size_t NumTargets; + /// number of quantum particles + const int myTableIndex; + /** Maximal number of supported MOs * this is not the AO basis because cusp correction is applied on the MO * directly. */ - int MaxOrbSize = 0; + int MaxOrbSize = 0; - /// COMPLEX WON'T WORK - using COT = CuspCorrectionAtomicBasis; + /// COMPLEX WON'T WORK + using COT = CuspCorrectionAtomicBasis; - /** container of the unique pointers to the Atomic Orbitals + /** container of the unique pointers to the Atomic Orbitals * * size of LOBasisSet = number of centers (atoms) * should use unique_ptr once COT is fixed for better performance */ - std::vector> LOBasisSet; + std::vector> LOBasisSet; - Matrix myVGL; + Matrix myVGL; public: - /** constructor + /** constructor * @param ions ionic system * @param els electronic system */ - SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els); + SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els); - /** copy constructor */ - SoaCuspCorrectionT(const SoaCuspCorrectionT& a); + /** copy constructor */ + SoaCuspCorrectionT(const SoaCuspCorrectionT& a); - /** set the number of orbitals this cusp correction may serve. call this + /** set the number of orbitals this cusp correction may serve. call this * before adding any correction centers. */ - void - setOrbitalSetSize(int norbs); + void setOrbitalSetSize(int norbs); - /** compute VGL + /** compute VGL * @param P quantum particleset * @param iat active particle * @param vgl Matrix(5,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void - evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl); + void evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl); - void - evaluate_vgl(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi); + void evaluate_vgl(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void - evaluate_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& psi, - GradMatrix& dpsi, ValueMatrix& d2psi); + void evaluate_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi); - /** compute values for the iat-paricle move + /** compute values for the iat-paricle move * * Always uses getTempDists() and getTempDispls() */ - void - evaluateV(const ParticleSetT& P, int iat, ValueVector& psi); + void evaluateV(const ParticleSetT& P, int iat, ValueVector& psi); - /** add a new set of Centered Atomic Orbitals + /** add a new set of Centered Atomic Orbitals * @param icenter the index of the center * @param aos a set of Centered Atomic Orbitals */ - void - add(int icenter, std::unique_ptr aos); - - void - addVGL(const ParticleSetT& P, int iat, VGLVector& vgl) - { - evaluateVGL(P, iat, vgl); - } - void - addV(const ParticleSetT& P, int iat, ValueVector& psi) - { - evaluateV(P, iat, psi); - } - void - add_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& vals, - GradMatrix& dpsi, ValueMatrix& d2psi) - { - evaluate_vgl(P, iat, idx, vals, dpsi, d2psi); - } - void - add_vector_vgl(const ParticleSetT& P, int iat, ValueVector& vals, - GradVector& dpsi, ValueVector& d2psi) - { - evaluate_vgl(P, iat, vals, dpsi, d2psi); - } + void add(int icenter, std::unique_ptr aos); + + void addVGL(const ParticleSetT& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); } + void addV(const ParticleSetT& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); } + void add_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi) + { + evaluate_vgl(P, iat, idx, vals, dpsi, d2psi); + } + void add_vector_vgl(const ParticleSetT& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi) + { + evaluate_vgl(P, iat, vals, dpsi, d2psi); + } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp index 8b8ab7c66c4..cd844d7bf6b 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp @@ -74,466 +74,414 @@ RefVectorWithLeader SoaLocalizedBasisSetT::extractOneSpeciesBasi return one_species_basis_list; } -template -SoaLocalizedBasisSetT::SoaLocalizedBasisSetT( - ParticleSetT& ions, ParticleSetT& els) : - ions_(ions), - myTableIndex(els.addTable(ions, - DTModes::NEED_FULL_TABLE_ANYTIME | - DTModes::NEED_VP_FULL_TABLE_ON_HOST)), - SuperTwist(0.0) +template +SoaLocalizedBasisSetT::SoaLocalizedBasisSetT(ParticleSetT& ions, ParticleSetT& els) + : ions_(ions), + myTableIndex(els.addTable(ions, DTModes::NEED_FULL_TABLE_ANYTIME | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), + SuperTwist(0.0) { - NumCenters = ions.getTotalNum(); - NumTargets = els.getTotalNum(); - LOBasisSet.resize(ions.getSpeciesSet().getTotalNum()); - BasisOffset.resize(NumCenters + 1); - BasisSetSize = 0; + NumCenters = ions.getTotalNum(); + NumTargets = els.getTotalNum(); + LOBasisSet.resize(ions.getSpeciesSet().getTotalNum()); + BasisOffset.resize(NumCenters + 1); + BasisSetSize = 0; } -template -SoaLocalizedBasisSetT::SoaLocalizedBasisSetT( - const SoaLocalizedBasisSetT& a) : - SoaBasisSetBaseT(a), - NumCenters(a.NumCenters), - NumTargets(a.NumTargets), - ions_(a.ions_), - myTableIndex(a.myTableIndex), - SuperTwist(a.SuperTwist), - BasisOffset(a.BasisOffset) +template +SoaLocalizedBasisSetT::SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a) + : SoaBasisSetBaseT(a), + NumCenters(a.NumCenters), + NumTargets(a.NumTargets), + ions_(a.ions_), + myTableIndex(a.myTableIndex), + SuperTwist(a.SuperTwist), + BasisOffset(a.BasisOffset) { - LOBasisSet.reserve(a.LOBasisSet.size()); - for (auto& elem : a.LOBasisSet) - LOBasisSet.push_back(std::make_unique(*elem)); + LOBasisSet.reserve(a.LOBasisSet.size()); + for (auto& elem : a.LOBasisSet) + LOBasisSet.push_back(std::make_unique(*elem)); } -template -void -SoaLocalizedBasisSetT::setPBCParams( - const TinyVector& PBCImages, const TinyVector Sup_Twist, - const std::vector& phase_factor) +template +void SoaLocalizedBasisSetT::setPBCParams(const TinyVector& PBCImages, + const TinyVector Sup_Twist, + const std::vector& phase_factor) { - for (int i = 0; i < LOBasisSet.size(); ++i) - LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor); + for (int i = 0; i < LOBasisSet.size(); ++i) + LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor); - SuperTwist = Sup_Twist; + SuperTwist = Sup_Twist; } -template -void -SoaLocalizedBasisSetT::setBasisSetSize(int nbs) +template +void SoaLocalizedBasisSetT::setBasisSetSize(int nbs) { - const auto& IonID(ions_.GroupID); - if (BasisSetSize > 0 && nbs == BasisSetSize) - return; - - if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) { - // evaluate the total basis dimension and offset for each center - BasisOffset[0] = 0; - for (int c = 0; c < NumCenters; c++) - BasisOffset[c + 1] = - BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize(); - BasisSetSize = BasisOffset[NumCenters]; - } - else { - // when particles are reordered due to grouping, AOs need to restore the - // input order to match MOs. - std::vector map_input_to_storage(mapping.size()); - for (int c = 0; c < NumCenters; c++) - map_input_to_storage[mapping[c]] = c; - - std::vector basis_offset_input_order(BasisOffset.size(), 0); - for (int c = 0; c < NumCenters; c++) - basis_offset_input_order[c + 1] = basis_offset_input_order[c] + - LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize(); - - for (int c = 0; c < NumCenters; c++) - BasisOffset[c] = basis_offset_input_order[mapping[c]]; - - BasisSetSize = basis_offset_input_order[NumCenters]; - } + const auto& IonID(ions_.GroupID); + if (BasisSetSize > 0 && nbs == BasisSetSize) + return; + + if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) + { + // evaluate the total basis dimension and offset for each center + BasisOffset[0] = 0; + for (int c = 0; c < NumCenters; c++) + BasisOffset[c + 1] = BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize(); + BasisSetSize = BasisOffset[NumCenters]; + } + else + { + // when particles are reordered due to grouping, AOs need to restore the + // input order to match MOs. + std::vector map_input_to_storage(mapping.size()); + for (int c = 0; c < NumCenters; c++) + map_input_to_storage[mapping[c]] = c; + + std::vector basis_offset_input_order(BasisOffset.size(), 0); + for (int c = 0; c < NumCenters; c++) + basis_offset_input_order[c + 1] = + basis_offset_input_order[c] + LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize(); + + for (int c = 0; c < NumCenters; c++) + BasisOffset[c] = basis_offset_input_order[mapping[c]]; + + BasisSetSize = basis_offset_input_order[NumCenters]; + } } -template -void -SoaLocalizedBasisSetT::queryOrbitalsForSType( - const std::vector& corrCenter, std::vector& is_s_orbital) const +template +void SoaLocalizedBasisSetT::queryOrbitalsForSType(const std::vector& corrCenter, + std::vector& is_s_orbital) const { - const auto& IonID(ions_.GroupID); - for (int c = 0; c < NumCenters; c++) { - int idx = BasisOffset[c]; - int bss = LOBasisSet[IonID[c]]->BasisSetSize; - std::vector local_is_s_orbital(bss); - LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital); - for (int k = 0; k < bss; k++) { - if (corrCenter[c]) { - is_s_orbital[idx++] = local_is_s_orbital[k]; - } - else { - is_s_orbital[idx++] = false; - } - } + const auto& IonID(ions_.GroupID); + for (int c = 0; c < NumCenters; c++) + { + int idx = BasisOffset[c]; + int bss = LOBasisSet[IonID[c]]->BasisSetSize; + std::vector local_is_s_orbital(bss); + LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital); + for (int k = 0; k < bss; k++) + { + if (corrCenter[c]) + { + is_s_orbital[idx++] = local_is_s_orbital[k]; + } + else + { + is_s_orbital[idx++] = false; + } } + } +} + +template +void SoaLocalizedBasisSetT::evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) +{ + const auto& IonID(ions_.GroupID); + const auto& coordR = P.activeR(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + + PosType Tv; + for (int c = 0; c < NumCenters; c++) + { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateVGL(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv); + } } -template -void -SoaLocalizedBasisSetT::evaluateVGL( - const ParticleSetT& P, int iat, vgl_type& vgl) +template +void SoaLocalizedBasisSetT::mw_evaluateVGL(const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVGLArray& vgl_v) { + for (size_t iw = 0; iw < P_list.size(); iw++) + { const auto& IonID(ions_.GroupID); - const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); + const auto& coordR = P_list[iw].activeR(iat); + const auto& d_table = P_list[iw].getDistTableAB(myTableIndex); + const auto& dist = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); PosType Tv; - for (int c = 0; c < NumCenters; c++) { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateVGL( - P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv); - } -} -template -void -SoaLocalizedBasisSetT::mw_evaluateVGL( - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVGLArray& vgl_v) -{ - for (size_t iw = 0; iw < P_list.size(); iw++) { - const auto& IonID(ions_.GroupID); - const auto& coordR = P_list[iw].activeR(iat); - const auto& d_table = P_list[iw].getDistTableAB(myTableIndex); - const auto& dist = (P_list[iw].getActivePtcl() == iat) ? - d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P_list[iw].getActivePtcl() == iat) ? - d_table.getTempDispls() : - d_table.getDisplRow(iat); - - PosType Tv; - - // number of walkers * BasisSetSize - auto stride = vgl_v.size(1) * BasisSetSize; - assert(BasisSetSize == vgl_v.size(2)); - vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride); - - for (int c = 0; c < NumCenters; c++) { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], - displ[c], BasisOffset[c], vgl_iw, Tv); - } + // number of walkers * BasisSetSize + auto stride = vgl_v.size(1) * BasisSetSize; + assert(BasisSetSize == vgl_v.size(2)); + vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride); + + for (int c = 0; c < NumCenters; c++) + { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], displ[c], BasisOffset[c], vgl_iw, Tv); } + } } -template -void -SoaLocalizedBasisSetT::evaluateVGH( - const ParticleSetT& P, int iat, vgh_type& vgh) +template +void SoaLocalizedBasisSetT::evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) { - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) { - LOBasisSet[IonID[c]]->evaluateVGH( - P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh); - } + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + { + LOBasisSet[IonID[c]]->evaluateVGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh); + } } -template -void -SoaLocalizedBasisSetT::evaluateVGHGH( - const ParticleSetT& P, int iat, vghgh_type& vghgh) +template +void SoaLocalizedBasisSetT::evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) { - // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n"); + // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n"); - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) { - LOBasisSet[IonID[c]]->evaluateVGHGH( - P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh); - } + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + for (int c = 0; c < NumCenters; c++) + { + LOBasisSet[IonID[c]]->evaluateVGHGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh); + } } -template -void -SoaLocalizedBasisSetT::evaluateV( - const ParticleSetT& P, int iat, ORBT* restrict vals) +template +void SoaLocalizedBasisSetT::evaluateV(const ParticleSetT& P, int iat, ORBT* restrict vals) { - const auto& IonID(ions_.GroupID); - const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - - PosType Tv; - for (int c = 0; c < NumCenters; c++) { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateV( - P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv); - } + const auto& IonID(ions_.GroupID); + const auto& coordR = P.activeR(iat); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + + PosType Tv; + for (int c = 0; c < NumCenters; c++) + { + Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; + Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; + Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; + LOBasisSet[IonID[c]]->evaluateV(P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv); + } } -template -void -SoaLocalizedBasisSetT::mw_evaluateValue( - const RefVectorWithLeader>& P_list, int iat, - OffloadMWVArray& v) +template +void SoaLocalizedBasisSetT::mw_evaluateValue(const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVArray& v) { - for (size_t iw = 0; iw < P_list.size(); iw++) - evaluateV(P_list[iw], iat, v.data_at(iw, 0)); + for (size_t iw = 0; iw < P_list.size(); iw++) + evaluateV(P_list[iw], iat, v.data_at(iw, 0)); } -template -void -SoaLocalizedBasisSetT::evaluateGradSourceV( - const ParticleSetT& P, int iat, const ParticleSetT& ions, - int jion, vgl_type& vgl) +template +void SoaLocalizedBasisSetT::evaluateGradSourceV(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vgl_type& vgl) { - // We need to zero out the temporary array vgl. - auto* restrict gx = vgl.data(1); - auto* restrict gy = vgl.data(2); - auto* restrict gz = vgl.data(3); - - for (int ib = 0; ib < BasisSetSize; ib++) { - gx[ib] = 0; - gy[ib] = 0; - gz[ib] = 0; - } + // We need to zero out the temporary array vgl. + auto* restrict gx = vgl.data(1); + auto* restrict gy = vgl.data(2); + auto* restrict gz = vgl.data(3); - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); + for (int ib = 0; ib < BasisSetSize; ib++) + { + gx[ib] = 0; + gy[ib] = 0; + gz[ib] = 0; + } - PosType Tv; - Tv[0] = Tv[1] = Tv[2] = 0; - // Since LCAO's are written only in terms of (r-R), ionic derivatives only - // exist for the atomic center that we wish to take derivatives of. - // Moreover, we can obtain an ion derivative by multiplying an electron - // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For - // now, just note this is the electron VGL function. - LOBasisSet[IonID[jion]]->evaluateVGL( - P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv); + const auto& IonID(ions_.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); + + PosType Tv; + Tv[0] = Tv[1] = Tv[2] = 0; + // Since LCAO's are written only in terms of (r-R), ionic derivatives only + // exist for the atomic center that we wish to take derivatives of. + // Moreover, we can obtain an ion derivative by multiplying an electron + // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For + // now, just note this is the electron VGL function. + LOBasisSet[IonID[jion]]->evaluateVGL(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv); } -template -void -SoaLocalizedBasisSetT::evaluateGradSourceVGL( - const ParticleSetT& P, int iat, const ParticleSetT& ions, - int jion, vghgh_type& vghgh) +template +void SoaLocalizedBasisSetT::evaluateGradSourceVGL(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vghgh_type& vghgh) { - // We need to zero out the temporary array vghgh. - auto* restrict gx = vghgh.data(1); - auto* restrict gy = vghgh.data(2); - auto* restrict gz = vghgh.data(3); - - auto* restrict hxx = vghgh.data(4); - auto* restrict hxy = vghgh.data(5); - auto* restrict hxz = vghgh.data(6); - auto* restrict hyy = vghgh.data(7); - auto* restrict hyz = vghgh.data(8); - auto* restrict hzz = vghgh.data(9); - - auto* restrict gxxx = vghgh.data(10); - auto* restrict gxxy = vghgh.data(11); - auto* restrict gxxz = vghgh.data(12); - auto* restrict gxyy = vghgh.data(13); - auto* restrict gxyz = vghgh.data(14); - auto* restrict gxzz = vghgh.data(15); - auto* restrict gyyy = vghgh.data(16); - auto* restrict gyyz = vghgh.data(17); - auto* restrict gyzz = vghgh.data(18); - auto* restrict gzzz = vghgh.data(19); - - for (int ib = 0; ib < BasisSetSize; ib++) { - gx[ib] = 0; - gy[ib] = 0; - gz[ib] = 0; - - hxx[ib] = 0; - hxy[ib] = 0; - hxz[ib] = 0; - hyy[ib] = 0; - hyz[ib] = 0; - hzz[ib] = 0; - - gxxx[ib] = 0; - gxxy[ib] = 0; - gxxz[ib] = 0; - gxyy[ib] = 0; - gxyz[ib] = 0; - gxzz[ib] = 0; - gyyy[ib] = 0; - gyyz[ib] = 0; - gyzz[ib] = 0; - gzzz[ib] = 0; - } + // We need to zero out the temporary array vghgh. + auto* restrict gx = vghgh.data(1); + auto* restrict gy = vghgh.data(2); + auto* restrict gz = vghgh.data(3); + + auto* restrict hxx = vghgh.data(4); + auto* restrict hxy = vghgh.data(5); + auto* restrict hxz = vghgh.data(6); + auto* restrict hyy = vghgh.data(7); + auto* restrict hyz = vghgh.data(8); + auto* restrict hzz = vghgh.data(9); + + auto* restrict gxxx = vghgh.data(10); + auto* restrict gxxy = vghgh.data(11); + auto* restrict gxxz = vghgh.data(12); + auto* restrict gxyy = vghgh.data(13); + auto* restrict gxyz = vghgh.data(14); + auto* restrict gxzz = vghgh.data(15); + auto* restrict gyyy = vghgh.data(16); + auto* restrict gyyz = vghgh.data(17); + auto* restrict gyzz = vghgh.data(18); + auto* restrict gzzz = vghgh.data(19); + + for (int ib = 0; ib < BasisSetSize; ib++) + { + gx[ib] = 0; + gy[ib] = 0; + gz[ib] = 0; + + hxx[ib] = 0; + hxy[ib] = 0; + hxz[ib] = 0; + hyy[ib] = 0; + hyz[ib] = 0; + hzz[ib] = 0; + + gxxx[ib] = 0; + gxxy[ib] = 0; + gxxz[ib] = 0; + gxyy[ib] = 0; + gxyz[ib] = 0; + gxzz[ib] = 0; + gyyy[ib] = 0; + gyyz[ib] = 0; + gyzz[ib] = 0; + gzzz[ib] = 0; + } + + // Since jion is indexed on the source ions not the ions_ the distinction + // between ions_ and ions is extremely important. + const auto& IonID(ions.GroupID); + const auto& d_table = P.getDistTableAB(myTableIndex); + const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); + const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - // Since jion is indexed on the source ions not the ions_ the distinction - // between ions_ and ions is extremely important. - const auto& IonID(ions.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : - d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : - d_table.getDisplRow(iat); - - // Since LCAO's are written only in terms of (r-R), ionic derivatives only - // exist for the atomic center that we wish to take derivatives of. - // Moreover, we can obtain an ion derivative by multiplying an electron - // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For - // now, just note this is the electron VGL function. - - LOBasisSet[IonID[jion]]->evaluateVGHGH( - P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh); + // Since LCAO's are written only in terms of (r-R), ionic derivatives only + // exist for the atomic center that we wish to take derivatives of. + // Moreover, we can obtain an ion derivative by multiplying an electron + // derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For + // now, just note this is the electron VGL function. + + LOBasisSet[IonID[jion]]->evaluateVGHGH(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh); } -template -void -SoaLocalizedBasisSetT::add(int icenter, std::unique_ptr aos) +template +void SoaLocalizedBasisSetT::add(int icenter, std::unique_ptr aos) { - LOBasisSet[icenter] = std::move(aos); + LOBasisSet[icenter] = std::move(aos); } // TODO: this should be redone with template template parameters #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaCartesianTensor, - double>, + SoaAtomicBasisSetT, SoaCartesianTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaCartesianTensor, - float>, - float>; +template class SoaLocalizedBasisSetT, SoaCartesianTensor, float>, + float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaCartesianTensor, - std::complex>, + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaCartesianTensor, - std::complex>, + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>, std::complex>; #endif #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaSphericalTensor, - double>, + SoaAtomicBasisSetT, SoaSphericalTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaSphericalTensor, - float>, - float>; +template class SoaLocalizedBasisSetT, SoaSphericalTensor, float>, + float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaSphericalTensor, - std::complex>, + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaSphericalTensor, - std::complex>, + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>, std::complex>; #endif #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, double>, + SoaAtomicBasisSetT>, SoaCartesianTensor, double>, double>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, float>, + SoaAtomicBasisSetT>, SoaCartesianTensor, float>, float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; #endif #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, double>, + SoaAtomicBasisSetT>, SoaSphericalTensor, double>, double>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, float>, + SoaAtomicBasisSetT>, SoaSphericalTensor, float>, float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; #endif #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, double>, + SoaAtomicBasisSetT>, SoaCartesianTensor, double>, double>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, float>, + SoaAtomicBasisSetT>, SoaCartesianTensor, float>, float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; #endif #ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, double>, + SoaAtomicBasisSetT>, SoaSphericalTensor, double>, double>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, float>, + SoaAtomicBasisSetT>, SoaSphericalTensor, float>, float>; #else template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h index 6839fef181f..5fd6276c419 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h @@ -33,130 +33,114 @@ namespace qmcplusplus * a set of localized orbitals associated with a center. * The template parameter ORBT denotes the orbital value return type */ -template +template class SoaLocalizedBasisSetT : public SoaBasisSetBaseT { public: - using RealType = typename COT::RealType; - using BaseType = SoaBasisSetBaseT; - using ValueType = ORBT; - - using vgl_type = typename BaseType::vgl_type; - using vgh_type = typename BaseType::vgh_type; - using vghgh_type = typename BaseType::vghgh_type; - using PosType = typename ParticleSetT::PosType; - using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray; - using OffloadMWVArray = typename BaseType::OffloadMWVArray; - - using BaseType::BasisSetSize; - - /// number of centers, e.g., ions - size_t NumCenters; - /// number of quantum particles - size_t NumTargets; - /// ion particle set - const ParticleSetT& ions_; - /// number of quantum particles - const int myTableIndex; - /// Global Coordinate of Supertwist read from HDF5 - PosType SuperTwist; - - /** container to store the offsets of the basis functions for each center + using RealType = typename COT::RealType; + using BaseType = SoaBasisSetBaseT; + using ValueType = ORBT; + + using vgl_type = typename BaseType::vgl_type; + using vgh_type = typename BaseType::vgh_type; + using vghgh_type = typename BaseType::vghgh_type; + using PosType = typename ParticleSetT::PosType; + using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray; + using OffloadMWVArray = typename BaseType::OffloadMWVArray; + + using BaseType::BasisSetSize; + + /// number of centers, e.g., ions + size_t NumCenters; + /// number of quantum particles + size_t NumTargets; + /// ion particle set + const ParticleSetT& ions_; + /// number of quantum particles + const int myTableIndex; + /// Global Coordinate of Supertwist read from HDF5 + PosType SuperTwist; + + /** container to store the offsets of the basis functions for each center * Due to potential reordering of ions, offsets can be in any order. */ - std::vector BasisOffset; + std::vector BasisOffset; - /** container of the unique pointers to the Atomic Orbitals + /** container of the unique pointers to the Atomic Orbitals * * size of LOBasisSet = number of unique centers */ - std::vector> LOBasisSet; + std::vector> LOBasisSet; - /** constructor + /** constructor * @param ions ionic system * @param els electronic system */ - SoaLocalizedBasisSetT(ParticleSetT& ions, ParticleSetT& els); + SoaLocalizedBasisSetT(ParticleSetT& ions, ParticleSetT& els); - /** copy constructor */ - SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a); + /** copy constructor */ + SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a); - /** makeClone */ - BaseType* - makeClone() const override - { - return new SoaLocalizedBasisSetT(*this); - } + /** makeClone */ + BaseType* makeClone() const override { return new SoaLocalizedBasisSetT(*this); } - /** set Number of periodic Images to evaluate the orbitals. + /** set Number of periodic Images to evaluate the orbitals. Set to 0 for non-PBC, and set manually in the input. Passes the pre-computed phase factor for evaluation of complex wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or -1 if non-Gamma. */ - void - setPBCParams(const TinyVector& PBCImages, - const TinyVector Sup_Twist, - const std::vector& phase_factor); + void setPBCParams(const TinyVector& PBCImages, + const TinyVector Sup_Twist, + const std::vector& phase_factor); - /** set BasisSetSize and allocate mVGL container + /** set BasisSetSize and allocate mVGL container */ - void - setBasisSetSize(int nbs) override; + void setBasisSetSize(int nbs) override; - /** Determine which orbitals are S-type. Used by cusp correction. + /** Determine which orbitals are S-type. Used by cusp correction. */ - void - queryOrbitalsForSType(const std::vector& corrCenter, - std::vector& is_s_orbital) const override; + void queryOrbitalsForSType(const std::vector& corrCenter, std::vector& is_s_orbital) const override; - /** compute VGL + /** compute VGL * @param P quantum particleset * @param iat active particle * @param vgl Matrix(5,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void - evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) override; + void evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) override; - /** compute V using packed array with all walkers + /** compute V using packed array with all walkers * @param P_list list of quantum particleset (one for each walker) * @param iat active particle * @param v Array(n_walkers, BasisSetSize) */ - void - mw_evaluateValue(const RefVectorWithLeader>& P_list, - int iat, OffloadMWVArray& v) override; + void mw_evaluateValue(const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& v) override; - /** compute VGL using packed array with all walkers + /** compute VGL using packed array with all walkers * @param P_list list of quantum particleset (one for each walker) * @param iat active particle * @param vgl Array(n_walkers, 5, BasisSetSize) */ - void - mw_evaluateVGL(const RefVectorWithLeader>& P_list, - int iat, OffloadMWVGLArray& vgl) override; + void mw_evaluateVGL(const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& vgl) override; - /** compute VGH + /** compute VGH * @param P quantum particleset * @param iat active particle * @param vgl Matrix(10,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void - evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) override; + void evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) override; - /** compute VGHGH + /** compute VGHGH * @param P quantum particleset * @param iat active particle * @param vghgh Matrix(20,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void - evaluateVGHGH( - const ParticleSetT& P, int iat, vghgh_type& vghgh) override; + void evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) override; - /** compute values for the iat-paricle move + /** compute values for the iat-paricle move * * Always uses getTempDists() and getTempDispls() * Tv is a translation vector; In PBC, in order to reduce the number @@ -167,46 +151,47 @@ class SoaLocalizedBasisSetT : public SoaBasisSetBaseT * displacement. We need to keep track of Tv because it must be add * as a phase factor, i.e., exp(i*k*Tv). */ - void - evaluateV( - const ParticleSetT& P, int iat, ORBT* restrict vals) override; + void evaluateV(const ParticleSetT& P, int iat, ORBT* restrict vals) override; - void - evaluateGradSourceV(const ParticleSetT& P, int iat, - const ParticleSetT& ions, int jion, vgl_type& vgl) override; + void evaluateGradSourceV(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vgl_type& vgl) override; - void - evaluateGradSourceVGL(const ParticleSetT& P, int iat, - const ParticleSetT& ions, int jion, vghgh_type& vghgh) override; + void evaluateGradSourceVGL(const ParticleSetT& P, + int iat, + const ParticleSetT& ions, + int jion, + vghgh_type& vghgh) override; - /** add a new set of Centered Atomic Orbitals + /** add a new set of Centered Atomic Orbitals * @param icenter the index of the center * @param aos a set of Centered Atomic Orbitals */ - void - add(int icenter, std::unique_ptr aos); + void add(int icenter, std::unique_ptr aos); - /** initialize a shared resource and hand it to collection + /** initialize a shared resource and hand it to collection */ - void createResource(ResourceCollection& collection) const override; + void createResource(ResourceCollection& collection) const override; - /** acquire a shared resource from collection + /** acquire a shared resource from collection */ - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const override; + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const override; - /** return a shared resource to collection + /** return a shared resource to collection */ - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const override; + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const override; - /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`) + /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`) * from a list of basis sets */ - static RefVectorWithLeader extractOneSpeciesBasisRefList( - const RefVectorWithLeader>& basisset_list, - int id); + static RefVectorWithLeader extractOneSpeciesBasisRefList( + const RefVectorWithLeader>& basisset_list, + int id); }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h index 1ab14979b7e..d435db04023 100644 --- a/src/QMCWaveFunctions/OptimizableObjectT.h +++ b/src/QMCWaveFunctions/OptimizableObjectT.h @@ -23,35 +23,25 @@ namespace qmcplusplus template using OptVariablesTypeT = optimize::VariableSetT; -template +template class OptimizableObjectT { public: - OptimizableObjectT(const std::string& name) : name_(name) - { - } + OptimizableObjectT(const std::string& name) : name_(name) {} - const std::string& - getName() const - { - return name_; - } - bool - isOptimized() const - { - return is_optimized_; - } + const std::string& getName() const { return name_; } + bool isOptimized() const { return is_optimized_; } private: - /** Name of the optimizable object + /** Name of the optimizable object */ - const std::string name_; - /** If true, this object is actively modified during WFOpt + const std::string name_; + /** If true, this object is actively modified during WFOpt */ - bool is_optimized_ = false; + bool is_optimized_ = false; public: - /** check in variational parameters to the global list of parameters used by + /** check in variational parameters to the global list of parameters used by * the optimizer. * @param active a super set of optimizable variables * @@ -111,29 +101,22 @@ class OptimizableObjectT virtual void readVariationalParameters(hdf_archive& hin){}; }; -template +template class UniqueOptObjRefsT : public RefVector> { public: - OptimizableObjectT& - operator[](size_t i) const - { - return RefVector>::operator[](i); - } + OptimizableObjectT& operator[](size_t i) const { return RefVector>::operator[](i); } - void - push_back(OptimizableObjectT& obj) - { - if (obj.getName().empty()) - throw std::logic_error("BUG!! Only named OptimizableObject object " - "can be added to UniqueOptObjRefs!"); - auto result = std::find_if( - this->begin(), this->end(), [&](OptimizableObjectT& element) { - return element.getName() == obj.getName(); - }); - if (result == this->end()) - RefVector>::push_back(obj); - } + void push_back(OptimizableObjectT& obj) + { + if (obj.getName().empty()) + throw std::logic_error("BUG!! Only named OptimizableObject object " + "can be added to UniqueOptObjRefs!"); + auto result = std::find_if(this->begin(), this->end(), + [&](OptimizableObjectT& element) { return element.getName() == obj.getName(); }); + if (result == this->end()) + RefVector>::push_back(obj); + } }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp index fe006553092..6641e457350 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp @@ -22,11 +22,11 @@ namespace qmcplusplus { template int PWBasisT::readbasis(hdf_archive& h5basisgroup, - RealType ecutoff, - const ParticleLayout& lat, - const std::string& pwname, - const std::string& pwmultname, - bool resizeContainer) + RealType ecutoff, + const ParticleLayout& lat, + const std::string& pwname, + const std::string& pwmultname, + bool resizeContainer) { ///make a local copy Lattice = lat; diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h index e02706f7bfe..03ba6f3532f 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h @@ -40,66 +40,66 @@ namespace qmcplusplus * Rewrite of PlaneWaveBasis to utilize blas II or III * Support more general input tags */ -template +template class PWBasisT : public QMCTraits { public: - using RealType = typename RealAlias_impl::value_type; - using ComplexType = T; - using PosType = TinyVector; - using IndexType = QMCTraits::IndexType; - using ParticleLayout = typename ParticleSetT::ParticleLayout; - using GIndex_t = TinyVector; + using RealType = typename RealAlias_impl::value_type; + using ComplexType = T; + using PosType = TinyVector; + using IndexType = QMCTraits::IndexType; + using ParticleLayout = typename ParticleSetT::ParticleLayout; + using GIndex_t = TinyVector; private: - /// max of maxg[i] - int maxmaxg; - // Need to store the maximum translation in each dimension to use recursive - // PW generation. - GIndex_t maxg; - // The PlaneWave data - keep all of these strictly private to prevent - // inconsistencies. - RealType ecut; - /// twist angle in reduced - PosType twist; - /// twist angle in cartesian - PosType twist_cart; // Twist angle in reduced and Cartesian. + /// max of maxg[i] + int maxmaxg; + // Need to store the maximum translation in each dimension to use recursive + // PW generation. + GIndex_t maxg; + // The PlaneWave data - keep all of these strictly private to prevent + // inconsistencies. + RealType ecut; + /// twist angle in reduced + PosType twist; + /// twist angle in cartesian + PosType twist_cart; // Twist angle in reduced and Cartesian. - /// gvecs in reduced coordiates - std::vector gvecs; - /// Reduced coordinates with offset - /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim] - std::vector gvecs_shifted; + /// gvecs in reduced coordiates + std::vector gvecs; + /// Reduced coordinates with offset + /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim] + std::vector gvecs_shifted; - std::vector minusModKplusG2; - std::vector kplusgvecs_cart; // Cartesian. + std::vector minusModKplusG2; + std::vector kplusgvecs_cart; // Cartesian. - Matrix C; - // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not - // exp(iGr) We need a way of switching between them for G -> -G, otherwise - // the determinant will have multiple rows that are equal (to within a - // constant factor) of others, giving a zero determinant. For this, we build - // a vector (negative) which stores whether a vector is "+" or "-" (with - // some criterion, to be defined). We the switch from cos() to sin() based - // on the value of this input. - std::vector negative; + Matrix C; + // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not + // exp(iGr) We need a way of switching between them for G -> -G, otherwise + // the determinant will have multiple rows that are equal (to within a + // constant factor) of others, giving a zero determinant. For this, we build + // a vector (negative) which stores whether a vector is "+" or "-" (with + // some criterion, to be defined). We the switch from cos() to sin() based + // on the value of this input. + std::vector negative; public: - // enumeration for the value, laplacian, gradients and size - enum - { - PW_VALUE, - PW_LAP, - PW_GRADX, - PW_GRADY, - PW_GRADZ, - PW_MAXINDEX - }; + // enumeration for the value, laplacian, gradients and size + enum + { + PW_VALUE, + PW_LAP, + PW_GRADX, + PW_GRADY, + PW_GRADZ, + PW_MAXINDEX + }; - Matrix Z; + Matrix Z; - Vector Zv; - /* inputmap is used for a memory efficient way of + Vector Zv; + /* inputmap is used for a memory efficient way of * * importing the basis-set and coefficients when the desired energy cutoff * may be lower than that represented by all data in the wavefunction input @@ -115,70 +115,58 @@ class PWBasisT : public QMCTraits * twist-angle is used, the "sphere" of allowed planewaves is shifted. */ - Vector phi; + Vector phi; - std::vector inputmap; + std::vector inputmap; - /// total number of basis functions - int NumPlaneWaves; + /// total number of basis functions + int NumPlaneWaves; - /// local copy of Lattice - ParticleLayout Lattice; + /// local copy of Lattice + ParticleLayout Lattice; - /// default constructor - PWBasisT() : maxmaxg(0), NumPlaneWaves(0) - { - } + /// default constructor + PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {} - /// constructor - PWBasisT(const PosType& twistangle) : - maxmaxg(0), - twist(twistangle), - NumPlaneWaves(0) - { - } + /// constructor + PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {} - ~PWBasisT() - { - } + ~PWBasisT() {} - /// set the twist angle - void - setTwistAngle(const PosType& tang); + /// set the twist angle + void setTwistAngle(const PosType& tang); - /// reset - void - reset(); + /// reset + void reset(); - /** Read basisset from hdf5 file. Apply ecut. + /** Read basisset from hdf5 file. Apply ecut. * @param h5basisgroup h5 node where basis is located * @param ecutoff cutoff energy * @param lat CrystalLattice * @param resizeContainer if true, resize internal storage. * @return the number of plane waves */ - int - readbasis(hdf_archive& h5basisgroup, RealType ecutoff, - const ParticleLayout& lat, const std::string& pwname = "planewaves", - const std::string& pwmultname = "multipliers", - bool resizeContainer = true); + int readbasis(hdf_archive& h5basisgroup, + RealType ecutoff, + const ParticleLayout& lat, + const std::string& pwname = "planewaves", + const std::string& pwmultname = "multipliers", + bool resizeContainer = true); - /** Remove basis elements if kinetic energy > ecut. + /** Remove basis elements if kinetic energy > ecut. * * Keep and indexmap so we know how to match coefficients on read. */ - void - trimforecut(); + void trimforecut(); #if defined(PWBASIS_USE_RECURSIVE) - /** Fill the recursion coefficients matrix. + /** Fill the recursion coefficients matrix. * * @todo Generalize to non-orthorohmbic cells */ - inline void - BuildRecursionCoefs(const PosType& pos) - { - PosType tau_red(Lattice.toUnit(pos)); + inline void BuildRecursionCoefs(const PosType& pos) + { + PosType tau_red(Lattice.toUnit(pos)); // RealType phi=TWOPI*tau_red[0]; // RealType nphi=maxg0*phi; // ComplexType ct0(std::cos(phi),std::sin(phi)); @@ -200,78 +188,80 @@ class PWBasisT : public QMCTraits // C2[0]=t; // for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0); #pragma ivdep - for (int idim = 0; idim < 3; idim++) { - int ng = maxg[idim]; - RealType phi = TWOPI * tau_red[idim]; - RealType nphi = ng * phi; - ComplexType Ctemp(std::cos(phi), std::sin(phi)); - ComplexType t(std::cos(nphi), -std::sin(nphi)); - ComplexType* restrict cp_ptr = C[idim]; - *cp_ptr++ = t; - for (int n = 1; n <= 2 * ng; n++) { - *cp_ptr++ = (t *= Ctemp); - } - } - // Base version - // #pragma ivdep - // for(int idim=0; idim<3; idim++){ - // RealType phi=TWOPI*tau_red[idim]; - // ComplexType Ctemp(std::cos(phi),std::sin(phi)); - // int ng=maxg[idim]; - // ComplexType* restrict cp_ptr=C[idim]+ng; - // ComplexType* restrict cn_ptr=C[idim]+ng-1; - // *cp_ptr=1.0; - // for(int n=1; n<=ng; n++,cn_ptr--){ - // ComplexType t(Ctemp*(*cp_ptr++)); - // *cp_ptr = t; - // *cn_ptr = conj(t); - // } - // } - // Not valid for general supercell - // // Cartesian of twist for 1,1,1 (reduced coordinates) - // PosType G111(1.0,1.0,1.0); - // G111 = Lattice.k_cart(G111); - // - // //Precompute a small number of complex factors (PWs along - // b1,b2,b3 lines) - // //using a fast recursion algorithm - // #pragma ivdep - // for(int idim=0; idim<3; idim++){ - // //start the recursion with the 111 vector. - // RealType phi = pos[idim] * G111[idim]; - // register ComplexType Ctemp(std::cos(phi), std::sin(phi)); - // int ng=maxg[idim]; - // ComplexType* restrict cp_ptr=C[idim]+ng; - // ComplexType* restrict cn_ptr=C[idim]+ng-1; - // *cp_ptr=1.0; - // for(int n=1; n<=ng; n++,cn_ptr--){ - // ComplexType t(Ctemp*(*cp_ptr++)); - // *cp_ptr = t; - // *cn_ptr = conj(t); - // } - // } + for (int idim = 0; idim < 3; idim++) + { + int ng = maxg[idim]; + RealType phi = TWOPI * tau_red[idim]; + RealType nphi = ng * phi; + ComplexType Ctemp(std::cos(phi), std::sin(phi)); + ComplexType t(std::cos(nphi), -std::sin(nphi)); + ComplexType* restrict cp_ptr = C[idim]; + *cp_ptr++ = t; + for (int n = 1; n <= 2 * ng; n++) + { + *cp_ptr++ = (t *= Ctemp); + } } + // Base version + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // RealType phi=TWOPI*tau_red[idim]; + // ComplexType Ctemp(std::cos(phi),std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + // Not valid for general supercell + // // Cartesian of twist for 1,1,1 (reduced coordinates) + // PosType G111(1.0,1.0,1.0); + // G111 = Lattice.k_cart(G111); + // + // //Precompute a small number of complex factors (PWs along + // b1,b2,b3 lines) + // //using a fast recursion algorithm + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // //start the recursion with the 111 vector. + // RealType phi = pos[idim] * G111[idim]; + // register ComplexType Ctemp(std::cos(phi), std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + } - inline void - evaluate(const PosType& pos) + inline void evaluate(const PosType& pos) + { + BuildRecursionCoefs(pos); + RealType twistdotr = dot(twist_cart, pos); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) { - BuildRecursionCoefs(pos); - RealType twistdotr = dot(twist_cart, pos); - ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); - // Evaluate the planewaves for particle iat. - for (int ig = 0; ig < NumPlaneWaves; ig++) { - // PW is initialized as exp(i*twist.r) so that the final basis - // evaluations are for (twist+G).r - ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr)); - for (int idim = 0; idim < 3; idim++) - pw *= C(idim, gvecs_shifted[ig][idim]); - // pw *= C0[gvecs_shifted[ig][0]]; - // pw *= C1[gvecs_shifted[ig][1]]; - // pw *= C2[gvecs_shifted[ig][2]]; - Zv[ig] = pw; - } + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr)); + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + Zv[ig] = pw; } - /** Evaluate all planewaves and derivatives for the iat-th particle + } + /** Evaluate all planewaves and derivatives for the iat-th particle * * The basis functions are evaluated for particles iat: first <= iat < last * Evaluate the plane-waves at current particle coordinates using a fast @@ -279,91 +269,84 @@ class PWBasisT : public QMCTraits * These can be "dotted" with coefficients later to complete orbital * evaluations. */ - inline void - evaluateAll(const ParticleSetT& P, int iat) + inline void evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + BuildRecursionCoefs(r); + RealType twistdotr = dot(twist_cart, r); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves and derivatives. + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { - const PosType& r(P.activeR(iat)); - BuildRecursionCoefs(r); - RealType twistdotr = dot(twist_cart, r); - ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); - // Evaluate the planewaves and derivatives. - ComplexType* restrict zptr = Z.data(); - for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { - // PW is initialized as exp(i*twist.r) so that the final basis - // evaluations are for (twist+G).r - ComplexType pw(pw0); - // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed - for (int idim = 0; idim < 3; idim++) - pw *= C(idim, gvecs_shifted[ig][idim]); - // pw *= C0[gvecs_shifted[ig][0]]; - // pw *= C1[gvecs_shifted[ig][1]]; - // pw *= C2[gvecs_shifted[ig][2]]; - zptr[0] = pw; - zptr[1] = minusModKplusG2[ig] * pw; - zptr[2] = - kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); - zptr[3] = - kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); - zptr[4] = - kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); - } + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); + // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); } + } #else - inline void - evaluate(const PosType& pos) - { - // Evaluate the planewaves for particle iat. - for (int ig = 0; ig < NumPlaneWaves; ig++) - phi[ig] = dot(kplusgvecs_cart[ig], pos); - eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data()); - } - inline void - evaluateAll(const ParticleSetT& P, int iat) + inline void evaluate(const PosType& pos) + { + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) + phi[ig] = dot(kplusgvecs_cart[ig], pos); + eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data()); + } + inline void evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + evaluate(r); + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { - const PosType& r(P.activeR(iat)); - evaluate(r); - ComplexType* restrict zptr = Z.data(); - for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) { - // PW is initialized as exp(i*twist.r) so that the final basis - // evaluations are for (twist+G).r - ComplexType& pw = Zv[ig]; - zptr[0] = pw; - zptr[1] = minusModKplusG2[ig] * pw; - zptr[2] = - kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); - zptr[3] = - kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); - zptr[4] = - kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); - } + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType& pw = Zv[ig]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); } + } #endif - // /** Fill the recursion coefficients matrix. - // * - // * @todo Generalize to non-orthorohmbic cells - // */ - // void BuildRecursionCoefsByAdd(const PosType& pos) - // { - // // Cartesian of twist for 1,1,1 (reduced coordinates) - // PosType G111(1.0,1.0,1.0); - // G111 = Lattice.k_cart(G111); - // //PosType redP=P.Lattice.toUnit(P.R[iat]); - // //Precompute a small number of complex factors (PWs along b1,b2,b3 - // lines) for(int idim=0; idim<3; idim++){ - // //start the recursion with the 111 vector. - // RealType phi = pos[idim] * G111[idim]; - // int ng(maxg[idim]); - // RealType* restrict cp_ptr=logC[idim]+ng; - // RealType* restrict cn_ptr=logC[idim]+ng-1; - // *cp_ptr=0.0; - // //add INTEL vectorization - // for(int n=1; n<=ng; n++,cn_ptr--){ - // RealType t(phi+*cp_ptr++); - // *cp_ptr = t; - // *cn_ptr = -t; - // } - // } - // } + // /** Fill the recursion coefficients matrix. + // * + // * @todo Generalize to non-orthorohmbic cells + // */ + // void BuildRecursionCoefsByAdd(const PosType& pos) + // { + // // Cartesian of twist for 1,1,1 (reduced coordinates) + // PosType G111(1.0,1.0,1.0); + // G111 = Lattice.k_cart(G111); + // //PosType redP=P.Lattice.toUnit(P.R[iat]); + // //Precompute a small number of complex factors (PWs along b1,b2,b3 + // lines) for(int idim=0; idim<3; idim++){ + // //start the recursion with the 111 vector. + // RealType phi = pos[idim] * G111[idim]; + // int ng(maxg[idim]); + // RealType* restrict cp_ptr=logC[idim]+ng; + // RealType* restrict cn_ptr=logC[idim]+ng-1; + // *cp_ptr=0.0; + // //add INTEL vectorization + // for(int n=1; n<=ng; n++,cn_ptr--){ + // RealType t(phi+*cp_ptr++); + // *cp_ptr = t; + // *cn_ptr = -t; + // } + // } + // } }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp index 6d82f8fdac1..2e025fc99d8 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp @@ -23,128 +23,126 @@ namespace qmcplusplus { -template +template PWOrbitalSetT::~PWOrbitalSetT() { - if (OwnBasisSet && myBasisSet) - delete myBasisSet; - if (!IsCloned && this->C != nullptr) - delete this->C; + if (OwnBasisSet && myBasisSet) + delete myBasisSet; + if (!IsCloned && this->C != nullptr) + delete this->C; } -template -std::unique_ptr> -PWOrbitalSetT::makeClone() const +template +std::unique_ptr> PWOrbitalSetT::makeClone() const { - auto myclone = std::make_unique>(*this); - myclone->myBasisSet = new PWBasisT(*myBasisSet); - myclone->IsCloned = true; - return myclone; + auto myclone = std::make_unique>(*this); + myclone->myBasisSet = new PWBasisT(*myBasisSet); + myclone->IsCloned = true; + return myclone; } -template -void -PWOrbitalSetT::setOrbitalSetSize(int norbs) +template +void PWOrbitalSetT::setOrbitalSetSize(int norbs) +{} + +template +void PWOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) { + myBasisSet = bset; + this->OrbitalSetSize = nbands; + OwnBasisSet = cleanup; + BasisSetSize = myBasisSet->NumPlaneWaves; + this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize); + this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); + app_log() << " PWOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize + << " BasisSetSize = " << BasisSetSize << std::endl; } -template -void -PWOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) +template +void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) { - myBasisSet = bset; - this->OrbitalSetSize = nbands; - OwnBasisSet = cleanup; - BasisSetSize = myBasisSet->NumPlaneWaves; - this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize); - this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); - app_log() << " PWOrbitalSetT::resize OrbitalSetSize =" - << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize - << std::endl; + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } } -template -void -PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +template +void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) { - int ng = myBasisSet->inputmap.size(); - if (ng != coefs.size()) { - app_error() - << " Input G map does not match the basis size of wave functions " - << std::endl; - OHMMS::Controller->abort(); - } - // drop G points for the given TwistAngle - const std::vector& inputmap(myBasisSet->inputmap); - for (int ig = 0; ig < ng; ig++) { - if (inputmap[ig] > -1) - (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; - } + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } } -template -void -PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +template +void PWOrbitalSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - int ng = myBasisSet->inputmap.size(); - if (ng != coefs.size()) { - app_error() - << " Input G map does not match the basis size of wave functions " - << std::endl; - OHMMS::Controller->abort(); - } - // drop G points for the given TwistAngle - const std::vector& inputmap(myBasisSet->inputmap); - for (int ig = 0; ig < ng; ig++) { - if (inputmap[ig] > -1) - (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; - } + // Evaluate every orbital for particle iat. + // Evaluate the basis-set at these coordinates: + // myBasisSet->evaluate(P,iat); + myBasisSet->evaluate(P.activeR(iat)); + MatrixOperators::product(*(this->C), myBasisSet->Zv, psi); } -template -void -PWOrbitalSetT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void PWOrbitalSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - // Evaluate every orbital for particle iat. - // Evaluate the basis-set at these coordinates: - // myBasisSet->evaluate(P,iat); - myBasisSet->evaluate(P.activeR(iat)); - MatrixOperators::product(*(this->C), myBasisSet->Zv, psi); + // Evaluate the orbitals and derivatives for particle iat only. + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); + const T* restrict tptr = this->Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + psi[j] = tptr[PW_VALUE]; + d2psi[j] = tptr[PW_LAP]; + dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + } } -template -void -PWOrbitalSetT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void PWOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - // Evaluate the orbitals and derivatives for particle iat only. + for (int iat = first, i = 0; iat < last; iat++, i++) + { myBasisSet->evaluateAll(P, iat); MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); const T* restrict tptr = this->Temp.data(); - for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) { - psi[j] = tptr[PW_VALUE]; - d2psi[j] = tptr[PW_LAP]; - dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); - } -} - -template -void -PWOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) -{ - for (int iat = first, i = 0; iat < last; iat++, i++) { - myBasisSet->evaluateAll(P, iat); - MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); - const T* restrict tptr = this->Temp.data(); - for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) { - logdet(i, j) = tptr[PW_VALUE]; - d2logdet(i, j) = tptr[PW_LAP]; - dlogdet(i, j) = - GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); - } + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + logdet(i, j) = tptr[PW_VALUE]; + d2logdet(i, j) = tptr[PW_LAP]; + dlogdet(i, j) = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); } + } } // Class concrete types from T diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h index 9103a16ee2b..743a4744b69 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h @@ -29,118 +29,101 @@ namespace qmcplusplus { -template +template class PWOrbitalSetT : public SPOSetT { public: - using RealType = typename SPOSetT::RealType; - using ComplexType = T; - using PosType = typename SPOSetT::PosType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; - using IndexType = typename SPOSetT::IndexType; - - using BasisSet_t = PWBasisT; - using PWBasisPtr = PWBasisT*; - - /** inherit the enum of BasisSet_t */ - enum - { - PW_VALUE = BasisSet_t::PW_VALUE, - PW_LAP = BasisSet_t::PW_LAP, - PW_GRADX = BasisSet_t::PW_GRADX, - PW_GRADY = BasisSet_t::PW_GRADY, - PW_GRADZ = BasisSet_t::PW_GRADZ, - PW_MAXINDEX = BasisSet_t::PW_MAXINDEX - }; - - /** default constructor + using RealType = typename SPOSetT::RealType; + using ComplexType = T; + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using IndexType = typename SPOSetT::IndexType; + + using BasisSet_t = PWBasisT; + using PWBasisPtr = PWBasisT*; + + /** inherit the enum of BasisSet_t */ + enum + { + PW_VALUE = BasisSet_t::PW_VALUE, + PW_LAP = BasisSet_t::PW_LAP, + PW_GRADX = BasisSet_t::PW_GRADX, + PW_GRADY = BasisSet_t::PW_GRADY, + PW_GRADZ = BasisSet_t::PW_GRADZ, + PW_MAXINDEX = BasisSet_t::PW_MAXINDEX + }; + + /** default constructor */ - PWOrbitalSetT(const std::string& my_name) : - SPOSetT(my_name), - OwnBasisSet(false), - myBasisSet(nullptr), - BasisSetSize(0), - C(nullptr), - IsCloned(false) - { - } - - std::string - getClassName() const override - { - return "PWOrbitalSetT"; - } - - /** delete BasisSet only it owns this + PWOrbitalSetT(const std::string& my_name) + : SPOSetT(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false) + {} + + std::string getClassName() const override { return "PWOrbitalSetT"; } + + /** delete BasisSet only it owns this * * Builder takes care of who owns what */ - ~PWOrbitalSetT() override; + ~PWOrbitalSetT() override; - std::unique_ptr> - makeClone() const override; - /** resize the orbital base + std::unique_ptr> makeClone() const override; + /** resize the orbital base * @param bset PWBasis * @param nbands number of bands * @param cleaup if true, owns PWBasis. Will clean up. */ - void - resize(PWBasisPtr bset, int nbands, bool cleanup = false); + void resize(PWBasisPtr bset, int nbands, bool cleanup = false); - /** Builder class takes care of the assertion + /** Builder class takes care of the assertion */ - void - addVector(const std::vector& coefs, int jorb); - void - addVector(const std::vector& coefs, int jorb); - - void - setOrbitalSetSize(int norbs) override; - - inline T - evaluate(int ib, const PosType& pos) - { - myBasisSet->evaluate(pos); - return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data()); - } - - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - /** boolean + void addVector(const std::vector& coefs, int jorb); + void addVector(const std::vector& coefs, int jorb); + + void setOrbitalSetSize(int norbs) override; + + inline T evaluate(int ib, const PosType& pos) + { + myBasisSet->evaluate(pos); + return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data()); + } + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + /** boolean * * If true, this has to delete the BasisSet */ - bool OwnBasisSet; - /// TwistAngle of this PWOrbitalSetT - PosType TwistAngle; - /// My basis set - PWBasisPtr myBasisSet; - /// number of basis - IndexType BasisSetSize; - /** pointer to matrix containing the coefficients + bool OwnBasisSet; + /// TwistAngle of this PWOrbitalSetT + PosType TwistAngle; + /// My basis set + PWBasisPtr myBasisSet; + /// number of basis + IndexType BasisSetSize; + /** pointer to matrix containing the coefficients * * makeClone makes a shallow copy and flag IsCloned */ - ValueMatrix* C; - /// if true, do not clean up - bool IsCloned; + ValueMatrix* C; + /// if true, do not clean up + bool IsCloned; - /** temporary array to perform gemm operation */ - Matrix Temp; + /** temporary array to perform gemm operation */ + Matrix Temp; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index 116cc3d1438..e1fa8d6f125 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -21,292 +21,280 @@ namespace qmcplusplus { -template -RotatedSPOsT::RotatedSPOsT( - const std::string& my_name, std::unique_ptr>&& spos) : - SPOSetT(my_name), - OptimizableObjectT(my_name), - Phi(std::move(spos)), - nel_major_(0), - params_supplied(false) +template +RotatedSPOsT::RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos) + : SPOSetT(my_name), OptimizableObjectT(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false) { - this->OrbitalSetSize = Phi->getOrbitalSetSize(); + this->OrbitalSetSize = Phi->getOrbitalSetSize(); } -template +template RotatedSPOsT::~RotatedSPOsT() -{ -} +{} -template -void -RotatedSPOsT::setRotationParameters(const std::vector& param_list) +template +void RotatedSPOsT::setRotationParameters(const std::vector& param_list) { - params = param_list; - params_supplied = true; + params = param_list; + params_supplied = true; } -template -void -RotatedSPOsT::createRotationIndices( - int nel, int nmo, RotationIndices& rot_indices) +template +void RotatedSPOsT::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) { - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); + for (int i = 0; i < nel; i++) + for (int j = nel; j < nmo; j++) + rot_indices.emplace_back(i, j); } -template -void -RotatedSPOsT::createRotationIndicesFull( - int nel, int nmo, RotationIndices& rot_indices) +template +void RotatedSPOsT::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) { - rot_indices.reserve(nmo * (nmo - 1) / 2); - - // start with core-active rotations - put them at the beginning of the list - // so it matches the other list of rotation indices - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); - - // Add core-core rotations - put them at the end of the list - for (int i = 0; i < nel; i++) - for (int j = i + 1; j < nel; j++) - rot_indices.emplace_back(i, j); - - // Add active-active rotations - put them at the end of the list - for (int i = nel; i < nmo; i++) - for (int j = i + 1; j < nmo; j++) - rot_indices.emplace_back(i, j); + rot_indices.reserve(nmo * (nmo - 1) / 2); + + // start with core-active rotations - put them at the beginning of the list + // so it matches the other list of rotation indices + for (int i = 0; i < nel; i++) + for (int j = nel; j < nmo; j++) + rot_indices.emplace_back(i, j); + + // Add core-core rotations - put them at the end of the list + for (int i = 0; i < nel; i++) + for (int j = i + 1; j < nel; j++) + rot_indices.emplace_back(i, j); + + // Add active-active rotations - put them at the end of the list + for (int i = nel; i < nmo; i++) + for (int j = i + 1; j < nmo; j++) + rot_indices.emplace_back(i, j); } -template -void -RotatedSPOsT::constructAntiSymmetricMatrix( - const RotationIndices& rot_indices, const std::vector& param, - ValueMatrix& rot_mat) +template +void RotatedSPOsT::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, + const std::vector& param, + ValueMatrix& rot_mat) { - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size + assert(rot_indices.size() == param.size()); + // Assumes rot_mat is of the correct size - rot_mat = 0.0; + rot_mat = 0.0; - for (int i = 0; i < rot_indices.size(); i++) { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - const RealType x = param[i]; + for (int i = 0; i < rot_indices.size(); i++) + { + const int p = rot_indices[i].first; + const int q = rot_indices[i].second; + const RealType x = param[i]; - rot_mat[q][p] = x; - rot_mat[p][q] = -x; - } + rot_mat[q][p] = x; + rot_mat[p][q] = -x; + } } -template -void -RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( - const RotationIndices& rot_indices, const ValueMatrix& rot_mat, - std::vector& param) +template +void RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, + const ValueMatrix& rot_mat, + std::vector& param) { - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - for (int i = 0; i < rot_indices.size(); i++) { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - param[i] = rot_mat[q][p]; - } + assert(rot_indices.size() == param.size()); + // Assumes rot_mat is of the correct size + + for (int i = 0; i < rot_indices.size(); i++) + { + const int p = rot_indices[i].first; + const int q = rot_indices[i].second; + param[i] = rot_mat[q][p]; + } } template void RotatedSPOsT::resetParametersExclusive(const OptVariablesTypeT& active) { - std::vector delta_param(m_act_rot_inds.size()); - - size_t psize = m_act_rot_inds.size(); - - if (use_global_rot_) { - psize = m_full_rot_inds.size(); - assert(psize >= m_act_rot_inds.size()); - } - - std::vector old_param(psize); - std::vector new_param(psize); - - for (int i = 0; i < m_act_rot_inds.size(); i++) { - int loc = this->myVars.where(i); - delta_param[i] = active[loc] - this->myVars[i]; - this->myVars[i] = active[loc]; - } - - if (use_global_rot_) { - for (int i = 0; i < m_full_rot_inds.size(); i++) - old_param[i] = myVarsFull[i]; - - applyDeltaRotation(delta_param, old_param, new_param); - - // Save the the params - for (int i = 0; i < m_full_rot_inds.size(); i++) - myVarsFull[i] = new_param[i]; - } - else { - apply_rotation(delta_param, false); - - // Save the parameters in the history list - history_params_.push_back(delta_param); - } + std::vector delta_param(m_act_rot_inds.size()); + + size_t psize = m_act_rot_inds.size(); + + if (use_global_rot_) + { + psize = m_full_rot_inds.size(); + assert(psize >= m_act_rot_inds.size()); + } + + std::vector old_param(psize); + std::vector new_param(psize); + + for (int i = 0; i < m_act_rot_inds.size(); i++) + { + int loc = this->myVars.where(i); + delta_param[i] = active[loc] - this->myVars[i]; + this->myVars[i] = active[loc]; + } + + if (use_global_rot_) + { + for (int i = 0; i < m_full_rot_inds.size(); i++) + old_param[i] = myVarsFull[i]; + + applyDeltaRotation(delta_param, old_param, new_param); + + // Save the the params + for (int i = 0; i < m_full_rot_inds.size(); i++) + myVarsFull[i] = new_param[i]; + } + else + { + apply_rotation(delta_param, false); + + // Save the parameters in the history list + history_params_.push_back(delta_param); + } } -template -void -RotatedSPOsT::writeVariationalParameters(hdf_archive& hout) +template +void RotatedSPOsT::writeVariationalParameters(hdf_archive& hout) { - hout.push("RotatedSPOsT"); - if (use_global_rot_) { - hout.push("rotation_global"); - std::string rot_global_name = - std::string("rotation_global_") + SPOSetT::getName(); - - int nparam_full = myVarsFull.size(); - std::vector full_params(nparam_full); - for (int i = 0; i < nparam_full; i++) - full_params[i] = myVarsFull[i]; - - hout.write(full_params, rot_global_name); - hout.pop(); - } - else { - hout.push("rotation_history"); - size_t rows = history_params_.size(); - size_t cols = 0; - if (rows > 0) - cols = history_params_[0].size(); - - Matrix tmp(rows, cols); - for (size_t i = 0; i < rows; i++) - for (size_t j = 0; j < cols; j++) - tmp(i, j) = history_params_[i][j]; - - std::string rot_hist_name = - std::string("rotation_history_") + SPOSetT::getName(); - hout.write(tmp, rot_hist_name); - hout.pop(); - } + hout.push("RotatedSPOsT"); + if (use_global_rot_) + { + hout.push("rotation_global"); + std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); + + int nparam_full = myVarsFull.size(); + std::vector full_params(nparam_full); + for (int i = 0; i < nparam_full; i++) + full_params[i] = myVarsFull[i]; + + hout.write(full_params, rot_global_name); + hout.pop(); + } + else + { + hout.push("rotation_history"); + size_t rows = history_params_.size(); + size_t cols = 0; + if (rows > 0) + cols = history_params_[0].size(); + + Matrix tmp(rows, cols); + for (size_t i = 0; i < rows; i++) + for (size_t j = 0; j < cols; j++) + tmp(i, j) = history_params_[i][j]; + + std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); + hout.write(tmp, rot_hist_name); + hout.pop(); + } - // Save myVars in order to restore object state exactly - // The values aren't meaningful, but they need to match those saved in - // VariableSet - hout.push("rotation_params"); - std::string rot_params_name = - std::string("rotation_params_") + SPOSetT::getName(); + // Save myVars in order to restore object state exactly + // The values aren't meaningful, but they need to match those saved in + // VariableSet + hout.push("rotation_params"); + std::string rot_params_name = std::string("rotation_params_") + SPOSetT::getName(); - int nparam = this->myVars.size(); - std::vector params(nparam); - for (int i = 0; i < nparam; i++) - params[i] = this->myVars[i]; + int nparam = this->myVars.size(); + std::vector params(nparam); + for (int i = 0; i < nparam; i++) + params[i] = this->myVars[i]; - hout.write(params, rot_params_name); - hout.pop(); + hout.write(params, rot_params_name); + hout.pop(); - hout.pop(); + hout.pop(); } -template -void -RotatedSPOsT::readVariationalParameters(hdf_archive& hin) +template +void RotatedSPOsT::readVariationalParameters(hdf_archive& hin) { - hin.push("RotatedSPOsT", false); - - bool grp_hist_exists = hin.is_group("rotation_history"); - bool grp_global_exists = hin.is_group("rotation_global"); - if (!grp_hist_exists && !grp_global_exists) - app_warning() << "Rotation parameters not found in VP file"; - - if (grp_global_exists) { - hin.push("rotation_global", false); - std::string rot_global_name = - std::string("rotation_global_") + SPOSetT::getName(); - - std::vector sizes(1); - if (!hin.getShape(rot_global_name, sizes)) - throw std::runtime_error( - "Failed to read rotation_global in VP file"); - - int nparam_full_actual = sizes[0]; - int nparam_full = myVarsFull.size(); - - if (nparam_full != nparam_full_actual) { - std::ostringstream tmp_err; - tmp_err << "Expected number of full rotation parameters (" - << nparam_full << ") does not match number in file (" - << nparam_full_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } - std::vector full_params(nparam_full); - hin.read(full_params, rot_global_name); - for (int i = 0; i < nparam_full; i++) - myVarsFull[i] = full_params[i]; + hin.push("RotatedSPOsT", false); - hin.pop(); + bool grp_hist_exists = hin.is_group("rotation_history"); + bool grp_global_exists = hin.is_group("rotation_global"); + if (!grp_hist_exists && !grp_global_exists) + app_warning() << "Rotation parameters not found in VP file"; - applyFullRotation(full_params, true); + if (grp_global_exists) + { + hin.push("rotation_global", false); + std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); + + std::vector sizes(1); + if (!hin.getShape(rot_global_name, sizes)) + throw std::runtime_error("Failed to read rotation_global in VP file"); + + int nparam_full_actual = sizes[0]; + int nparam_full = myVarsFull.size(); + + if (nparam_full != nparam_full_actual) + { + std::ostringstream tmp_err; + tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file (" + << nparam_full_actual << ")"; + throw std::runtime_error(tmp_err.str()); } - else if (grp_hist_exists) { - hin.push("rotation_history", false); - std::string rot_hist_name = - std::string("rotation_history_") + SPOSetT::getName(); - std::vector sizes(2); - if (!hin.getShape(rot_hist_name, sizes)) - throw std::runtime_error( - "Failed to read rotation history in VP file"); - - int rows = sizes[0]; - int cols = sizes[1]; - history_params_.resize(rows); - Matrix tmp(rows, cols); - hin.read(tmp, rot_hist_name); - for (size_t i = 0; i < rows; i++) { - history_params_[i].resize(cols); - for (size_t j = 0; j < cols; j++) - history_params_[i][j] = tmp(i, j); - } + std::vector full_params(nparam_full); + hin.read(full_params, rot_global_name); + for (int i = 0; i < nparam_full; i++) + myVarsFull[i] = full_params[i]; - hin.pop(); + hin.pop(); - applyRotationHistory(); + applyFullRotation(full_params, true); + } + else if (grp_hist_exists) + { + hin.push("rotation_history", false); + std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); + std::vector sizes(2); + if (!hin.getShape(rot_hist_name, sizes)) + throw std::runtime_error("Failed to read rotation history in VP file"); + + int rows = sizes[0]; + int cols = sizes[1]; + history_params_.resize(rows); + Matrix tmp(rows, cols); + hin.read(tmp, rot_hist_name); + for (size_t i = 0; i < rows; i++) + { + history_params_[i].resize(cols); + for (size_t j = 0; j < cols; j++) + history_params_[i][j] = tmp(i, j); } - hin.push("rotation_params", false); - std::string rot_param_name = - std::string("rotation_params_") + SPOSetT::getName(); + hin.pop(); + + applyRotationHistory(); + } - std::vector sizes(1); - if (!hin.getShape(rot_param_name, sizes)) - throw std::runtime_error("Failed to read rotation_params in VP file"); - - int nparam_actual = sizes[0]; - int nparam = this->myVars.size(); - if (nparam != nparam_actual) { - std::ostringstream tmp_err; - tmp_err << "Expected number of rotation parameters (" << nparam - << ") does not match number in file (" << nparam_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } + hin.push("rotation_params", false); + std::string rot_param_name = std::string("rotation_params_") + SPOSetT::getName(); - std::vector params(nparam); - hin.read(params, rot_param_name); - for (int i = 0; i < nparam; i++) - this->myVars[i] = params[i]; + std::vector sizes(1); + if (!hin.getShape(rot_param_name, sizes)) + throw std::runtime_error("Failed to read rotation_params in VP file"); - hin.pop(); + int nparam_actual = sizes[0]; + int nparam = this->myVars.size(); + if (nparam != nparam_actual) + { + std::ostringstream tmp_err; + tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file (" + << nparam_actual << ")"; + throw std::runtime_error(tmp_err.str()); + } - hin.pop(); + std::vector params(nparam); + hin.read(params, rot_param_name); + for (int i = 0; i < nparam; i++) + this->myVars[i] = params[i]; + + hin.pop(); + + hin.pop(); } -template -void -RotatedSPOsT::buildOptVariables(const size_t nel) +template +void RotatedSPOsT::buildOptVariables(const size_t nel) { - /* Only rebuild optimized variables if more after-rotation orbitals are + /* Only rebuild optimized variables if more after-rotation orbitals are * needed Consider ROHF, there is only one set of SPO for both spin up and * down Nup > Ndown. nel_major_ will be set Nup. * @@ -314,347 +302,338 @@ RotatedSPOsT::buildOptVariables(const size_t nel) * parameters again when a clone is made (the DiracDeterminant constructor * calls buildOptVariables) */ - if (nel > nel_major_ && this->myVars.size() == 0) { - nel_major_ = nel; - - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotation parameter indices - RotationIndices created_m_act_rot_inds; - - RotationIndices created_full_rot_inds; - if (use_global_rot_) - createRotationIndicesFull(nel, nmo, created_full_rot_inds); + if (nel > nel_major_ && this->myVars.size() == 0) + { + nel_major_ = nel; - createRotationIndices(nel, nmo, created_m_act_rot_inds); - - buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); - } -} - -template -void -RotatedSPOsT::buildOptVariables( - const RotationIndices& rotations, const RotationIndices& full_rotations) -{ const size_t nmo = Phi->getOrbitalSetSize(); - // create active rotations - m_act_rot_inds = rotations; + // create active rotation parameter indices + RotationIndices created_m_act_rot_inds; + RotationIndices created_full_rot_inds; if (use_global_rot_) - m_full_rot_inds = full_rotations; + createRotationIndicesFull(nel, nmo, created_full_rot_inds); - if (use_global_rot_) - app_log() << "Orbital rotation using global rotation" << std::endl; - else - app_log() << "Orbital rotation using history" << std::endl; + createRotationIndices(nel, nmo, created_m_act_rot_inds); - // This will add the orbital rotation parameters to myVars - // and will also read in initial parameter values supplied in input file - int p, q; - int nparams_active = m_act_rot_inds.size(); + buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); + } +} - app_log() << "nparams_active: " << nparams_active - << " params2.size(): " << params.size() << std::endl; +template +void RotatedSPOsT::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) +{ + const size_t nmo = Phi->getOrbitalSetSize(); + + // create active rotations + m_act_rot_inds = rotations; + + if (use_global_rot_) + m_full_rot_inds = full_rotations; + + if (use_global_rot_) + app_log() << "Orbital rotation using global rotation" << std::endl; + else + app_log() << "Orbital rotation using history" << std::endl; + + // This will add the orbital rotation parameters to myVars + // and will also read in initial parameter values supplied in input file + int p, q; + int nparams_active = m_act_rot_inds.size(); + + app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl; + if (params_supplied) + if (nparams_active != params.size()) + throw std::runtime_error("The number of supplied orbital rotation parameters does not " + "match number prdouced by the slater " + "expansion. \n"); + + this->myVars.clear(); + for (int i = 0; i < nparams_active; i++) + { + p = m_act_rot_inds[i].first; + q = m_act_rot_inds[i].second; + std::stringstream sstr; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") + << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; + + // If the user input parameters, use those. Otherwise, initialize the + // parameters to zero if (params_supplied) - if (nparams_active != params.size()) - throw std::runtime_error( - "The number of supplied orbital rotation parameters does not " - "match number prdouced by the slater " - "expansion. \n"); - - this->myVars.clear(); - for (int i = 0; i < nparams_active; i++) { - p = m_act_rot_inds[i].first; - q = m_act_rot_inds[i].second; - std::stringstream sstr; - sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") - << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" - << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") - << (q < 1000 ? "0" : "") << q; - - // If the user input parameters, use those. Otherwise, initialize the - // parameters to zero - if (params_supplied) { - this->myVars.insert(sstr.str(), params[i]); - } - else { - this->myVars.insert(sstr.str(), 0.0); - } + { + this->myVars.insert(sstr.str(), params[i]); } - - if (use_global_rot_) { - myVarsFull.clear(); - for (int i = 0; i < m_full_rot_inds.size(); i++) { - p = m_full_rot_inds[i].first; - q = m_full_rot_inds[i].second; - std::stringstream sstr; - sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") - << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" - << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") - << (q < 1000 ? "0" : "") << q; - - if (params_supplied && i < m_act_rot_inds.size()) - myVarsFull.insert(sstr.str(), params[i]); - else - myVarsFull.insert(sstr.str(), 0.0); - } + else + { + this->myVars.insert(sstr.str(), 0.0); } - - // Printing the parameters - if (true) { - app_log() << std::string(16, ' ') << "Parameter name" - << std::string(15, ' ') << "Value\n"; - this->myVars.print(app_log()); - } - - if (params_supplied) { - std::vector param(m_act_rot_inds.size()); - for (int i = 0; i < m_act_rot_inds.size(); i++) - param[i] = this->myVars[i]; - apply_rotation(param, false); + } + + if (use_global_rot_) + { + myVarsFull.clear(); + for (int i = 0; i < m_full_rot_inds.size(); i++) + { + p = m_full_rot_inds[i].first; + q = m_full_rot_inds[i].second; + std::stringstream sstr; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") + << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") + << q; + + if (params_supplied && i < m_act_rot_inds.size()) + myVarsFull.insert(sstr.str(), params[i]); + else + myVarsFull.insert(sstr.str(), 0.0); } + } + + // Printing the parameters + if (true) + { + app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n"; + this->myVars.print(app_log()); + } + + if (params_supplied) + { + std::vector param(m_act_rot_inds.size()); + for (int i = 0; i < m_act_rot_inds.size(); i++) + param[i] = this->myVars[i]; + apply_rotation(param, false); + } } -template -void -RotatedSPOsT::apply_rotation( - const std::vector& param, bool use_stored_copy) +template +void RotatedSPOsT::apply_rotation(const std::vector& param, bool use_stored_copy) { - assert(param.size() == m_act_rot_inds.size()); + assert(param.size() == m_act_rot_inds.size()); - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); + constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); - /* + /* rot_mat is now an anti-hermitian matrix. Now we convert it into a unitary matrix via rot_mat = exp(-rot_mat). Finally, apply unitary matrix to orbs. */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); + exponentiate_antisym_matrix(rot_mat); + Phi->applyRotation(rot_mat, use_stored_copy); } -template -void -RotatedSPOsT::applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, std::vector& new_param) +template +void RotatedSPOsT::applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + std::vector& new_param) { - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix new_rot_mat(nmo, nmo); - constructDeltaRotation(delta_param, old_param, m_act_rot_inds, - m_full_rot_inds, new_param, new_rot_mat); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix new_rot_mat(nmo, nmo); + constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat); - Phi->applyRotation(new_rot_mat, true); + Phi->applyRotation(new_rot_mat, true); } -template -void -RotatedSPOsT::constructDeltaRotation( - const std::vector& delta_param, - const std::vector& old_param, const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, std::vector& new_param, - ValueMatrix& new_rot_mat) +template +void RotatedSPOsT::constructDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, + std::vector& new_param, + ValueMatrix& new_rot_mat) { - assert(delta_param.size() == act_rot_inds.size()); - assert(old_param.size() == full_rot_inds.size()); - assert(new_param.size() == full_rot_inds.size()); + assert(delta_param.size() == act_rot_inds.size()); + assert(old_param.size() == full_rot_inds.size()); + assert(new_param.size() == full_rot_inds.size()); - const size_t nmo = new_rot_mat.rows(); - assert(new_rot_mat.rows() == new_rot_mat.cols()); + const size_t nmo = new_rot_mat.rows(); + assert(new_rot_mat.rows() == new_rot_mat.cols()); - ValueMatrix old_rot_mat(nmo, nmo); + ValueMatrix old_rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); - exponentiate_antisym_matrix(old_rot_mat); + constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); + exponentiate_antisym_matrix(old_rot_mat); - ValueMatrix delta_rot_mat(nmo, nmo); + ValueMatrix delta_rot_mat(nmo, nmo); - constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); - exponentiate_antisym_matrix(delta_rot_mat); + constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); + exponentiate_antisym_matrix(delta_rot_mat); - // Apply delta rotation to old rotation. - BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, - old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), nmo); + // Apply delta rotation to old rotation. + BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), + nmo); - ValueMatrix log_rot_mat(nmo, nmo); - log_antisym_matrix(new_rot_mat, log_rot_mat); - extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); + ValueMatrix log_rot_mat(nmo, nmo); + log_antisym_matrix(new_rot_mat, log_rot_mat); + extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); } -template -void -RotatedSPOsT::applyFullRotation( - const std::vector& full_param, bool use_stored_copy) +template +void RotatedSPOsT::applyFullRotation(const std::vector& full_param, bool use_stored_copy) { - assert(full_param.size() == m_full_rot_inds.size()); + assert(full_param.size() == m_full_rot_inds.size()); - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - rot_mat = T(0); + const size_t nmo = Phi->getOrbitalSetSize(); + ValueMatrix rot_mat(nmo, nmo); + rot_mat = T(0); - constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); + constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); - /* + /* rot_mat is now an anti-hermitian matrix. Now we convert it into a unitary matrix via rot_mat = exp(-rot_mat). Finally, apply unitary matrix to orbs. */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); + exponentiate_antisym_matrix(rot_mat); + Phi->applyRotation(rot_mat, use_stored_copy); } -template -void -RotatedSPOsT::applyRotationHistory() +template +void RotatedSPOsT::applyRotationHistory() { - for (auto delta_param : history_params_) { - apply_rotation(delta_param, false); - } + for (auto delta_param : history_params_) + { + apply_rotation(delta_param, false); + } } // compute exponential of a real, antisymmetric matrix by diagonalizing and // exponentiating eigenvalues -template -void -RotatedSPOsT::exponentiate_antisym_matrix(ValueMatrix& mat) +template +void RotatedSPOsT::exponentiate_antisym_matrix(ValueMatrix& mat) { - const int n = mat.rows(); - std::vector> mat_h(n * n, 0); - std::vector eval(n, 0); - std::vector> work(2 * n, 0); - std::vector rwork(3 * n, 0); - std::vector> mat_d(n * n, 0); - std::vector> mat_t(n * n, 0); - // exponentiating e^X = e^iY (Y hermitian) - // i(-iX) = X, so -iX is hermitian - // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T - // construct hermitian analogue of mat by multiplying by -i - for (int i = 0; i < n; ++i) { - for (int j = i; j < n; ++j) { - mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); - mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); - } + const int n = mat.rows(); + std::vector> mat_h(n * n, 0); + std::vector eval(n, 0); + std::vector> work(2 * n, 0); + std::vector rwork(3 * n, 0); + std::vector> mat_d(n * n, 0); + std::vector> mat_t(n * n, 0); + // exponentiating e^X = e^iY (Y hermitian) + // i(-iX) = X, so -iX is hermitian + // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T + // construct hermitian analogue of mat by multiplying by -i + for (int i = 0; i < n; ++i) + { + for (int j = i; j < n; ++j) + { + mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); + mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); } - // diagonalize the matrix - char JOBZ('V'); - char UPLO('U'); - int N(n); - int LDA(n); - int LWORK(2 * n); - int info = 0; - LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), - LWORK, &rwork.at(0), info); - if (info != 0) { - std::ostringstream msg; - msg << "heev failed with info = " << info - << " in RotatedSPOsT::exponentiate_antisym_matrix"; - throw std::runtime_error(msg.str()); + } + // diagonalize the matrix + char JOBZ('V'); + char UPLO('U'); + int N(n); + int LDA(n); + int LWORK(2 * n); + int info = 0; + LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info); + if (info != 0) + { + std::ostringstream msg; + msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix"; + throw std::runtime_error(msg.str()); + } + // iterate through diagonal matrix, exponentiate terms + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + mat_d[i + j * n] = (i == j) ? std::exp(std::complex(0.0, eval[i])) : std::complex(0.0, 0.0); } - // iterate through diagonal matrix, exponentiate terms - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - mat_d[i + j * n] = (i == j) ? - std::exp(std::complex(0.0, eval[i])) : - std::complex(0.0, 0.0); - } + } + // perform matrix multiplication + // assume row major + BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n, + std::complex(0.0, 0.0), &mat_t.at(0), n); + BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n, + std::complex(0.0, 0.0), &mat_d.at(0), n); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + { + if (mat_d[i + n * j].imag() > 1e-12) + { + app_log() << "warning: large imaginary value in orbital " + "rotation matrix: (i,j) = (" + << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl; + } + mat[j][i] = mat_d[i + n * j].real(); } - // perform matrix multiplication - // assume row major - BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), - n, &mat_h.at(0), n, std::complex(0.0, 0.0), &mat_t.at(0), n); - BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), - n, &mat_t.at(0), n, std::complex(0.0, 0.0), &mat_d.at(0), n); - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) { - if (mat_d[i + n * j].imag() > 1e-12) { - app_log() << "warning: large imaginary value in orbital " - "rotation matrix: (i,j) = (" - << i << "," << j - << "), im = " << mat_d[i + n * j].imag() << std::endl; - } - mat[j][i] = mat_d[i + n * j].real(); - } } -template -void -RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) +template +void RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) { - const int n = mat.rows(); - std::vector mat_h(n * n, 0); - std::vector eval_r(n, 0); - std::vector eval_i(n, 0); - std::vector mat_l(n * n, 0); - std::vector work(4 * n, 0); - - std::vector> mat_cd(n * n, 0); - std::vector> mat_cl(n * n, 0); - std::vector> mat_ch(n * n, 0); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - mat_h[i + n * j] = mat[i][j]; - - // diagonalize the matrix - char JOBL('V'); - char JOBR('N'); - int N(n); - int LDA(n); - int LWORK(4 * n); - int info = 0; - LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), - &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, &work.at(0), &LWORK, - &info); - if (info != 0) { - std::ostringstream msg; - msg << "heev failed with info = " << info - << " in RotatedSPOsT::log_antisym_matrix"; - throw std::runtime_error(msg.str()); + const int n = mat.rows(); + std::vector mat_h(n * n, 0); + std::vector eval_r(n, 0); + std::vector eval_i(n, 0); + std::vector mat_l(n * n, 0); + std::vector work(4 * n, 0); + + std::vector> mat_cd(n * n, 0); + std::vector> mat_cl(n * n, 0); + std::vector> mat_ch(n * n, 0); + + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + mat_h[i + n * j] = mat[i][j]; + + // diagonalize the matrix + char JOBL('V'); + char JOBR('N'); + int N(n); + int LDA(n); + int LWORK(4 * n); + int info = 0; + LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, + &work.at(0), &LWORK, &info); + if (info != 0) + { + std::ostringstream msg; + msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix"; + throw std::runtime_error(msg.str()); + } + + // iterate through diagonal matrix, take log + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + auto tmp = (i == j) ? std::log(std::complex(eval_r[i], eval_i[i])) : std::complex(0.0, 0.0); + mat_cd[i + j * n] = tmp; + + if (eval_i[j] > 0.0) + { + mat_cl[i + j * n] = std::complex(mat_l[i + j * n], mat_l[i + (j + 1) * n]); + mat_cl[i + (j + 1) * n] = std::complex(mat_l[i + j * n], -mat_l[i + (j + 1) * n]); + } + else if (!(eval_i[j] < 0.0)) + { + mat_cl[i + j * n] = std::complex(mat_l[i + j * n], 0.0); + } } - - // iterate through diagonal matrix, take log - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - auto tmp = (i == j) ? - std::log(std::complex(eval_r[i], eval_i[i])) : - std::complex(0.0, 0.0); - mat_cd[i + j * n] = tmp; - - if (eval_i[j] > 0.0) { - mat_cl[i + j * n] = std::complex( - mat_l[i + j * n], mat_l[i + (j + 1) * n]); - mat_cl[i + (j + 1) * n] = std::complex( - mat_l[i + j * n], -mat_l[i + (j + 1) * n]); - } - else if (!(eval_i[j] < 0.0)) { - mat_cl[i + j * n] = - std::complex(mat_l[i + j * n], 0.0); - } - } + } + + RealType one(1.0); + RealType zero(0.0); + BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n); + BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n); + + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + { + if (mat_cd[i + n * j].imag() > 1e-12) + { + app_log() << "warning: large imaginary value in antisymmetric " + "matrix: (i,j) = (" + << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl; + } + output[i][j] = mat_cd[i + n * j].real(); } - - RealType one(1.0); - RealType zero(0.0); - BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, - &mat_ch.at(0), n); - BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, - &mat_cd.at(0), n); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) { - if (mat_cd[i + n * j].imag() > 1e-12) { - app_log() << "warning: large imaginary value in antisymmetric " - "matrix: (i,j) = (" - << i << "," << j - << "), im = " << mat_cd[i + n * j].imag() - << std::endl; - } - output[i][j] = mat_cd[i + n * j].real(); - } } template @@ -667,79 +646,78 @@ void RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, int FirstIndex, int LastIndex) { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; - const ParticleSetT& P = VP.getRefPS(); - int iel = VP.refPtcl; + const ParticleSetT& P = VP.getRefPS(); + int iel = VP.refPtcl; - Phi->evaluate_notranspose( - P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); - Invert(psiM_inv.data(), nel, nel); + Invert(psiM_inv.data(), nel, nel); - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - ValueMatrix T_orig; - T_orig.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), - T_orig.data(), nmo); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + ValueMatrix T_orig; + T_orig.resize(nel, nmo); - ValueMatrix T_mat; - T_mat.resize(nel, nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo); - ValueVector tmp_psi; - tmp_psi.resize(nmo); + ValueMatrix T_mat; + T_mat.resize(nel, nmo); - for (int iat = 0; iat < VP.getTotalNum(); iat++) { - Phi->evaluateValue(VP, iat, tmp_psi); + ValueVector tmp_psi; + tmp_psi.resize(nmo); - for (int j = 0; j < nmo; j++) - psiM_all(iel - FirstIndex, j) = tmp_psi[j]; + for (int iat = 0; iat < VP.getTotalNum(); iat++) + { + Phi->evaluateValue(VP, iat, tmp_psi); - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + for (int j = 0; j < nmo; j++) + psiM_all(iel - FirstIndex, j) = tmp_psi[j]; - Invert(psiM_inv.data(), nel, nel); + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); + Invert(psiM_inv.data(), nel, nel); - // The matrix A is rectangular. Ainv is the inverse of the square part - // of the matrix. The multiply of Ainv and the square part of A is just - // the identity. This multiply could be reduced to Ainv and the - // non-square part of A. - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), - T_mat.data(), nmo); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); - for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = this->myVars.where(i); - if (kk >= 0) { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dratios(iat, kk) = T_mat(p, q) - - T_orig(p, q); // dratio size is (nknot, num_vars) - } - } + // The matrix A is rectangular. Ainv is the inverse of the square part + // of the matrix. The multiply of Ainv and the square part of A is just + // the identity. This multiply could be reduced to Ainv and the + // non-square part of A. + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); + + for (int i = 0; i < m_act_rot_inds.size(); i++) + { + int kk = this->myVars.where(i); + if (kk >= 0) + { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) + } } + } } template @@ -749,47 +727,47 @@ void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, int FirstIndex, int LastIndex) { - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; - Phi->evaluate_notranspose( - P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); - Invert(psiM_inv.data(), nel, nel); + Invert(psiM_inv.data(), nel, nel); - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const T* const A(psiM_all.data()); - const T* const Ainv(psiM_inv.data()); - ValueMatrix T_mat; - T_mat.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), - T_mat.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = this->myVars.where(i); - if (kk >= 0) { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] = T_mat(p, q); - } + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + ValueMatrix T_mat; + T_mat.resize(nel, nmo); + + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); + + for (int i = 0; i < m_act_rot_inds.size(); i++) + { + int kk = this->myVars.where(i); + if (kk >= 0) + { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dlogpsi[kk] = T_mat(p, q); } + } } template @@ -800,104 +778,102 @@ void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, const int& FirstIndex, const int& LastIndex) { - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - myG_temp.resize(nel); - myG_J.resize(nel); - myL_temp.resize(nel); - myL_J.resize(nel); - - myG_temp = 0; - myG_J = 0; - myL_temp = 0; - myL_J = 0; - - Bbar.resize(nel, nmo); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - Bbar = 0; - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - Phi->evaluate_notranspose( - P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - + const size_t nel = LastIndex - FirstIndex; + const size_t nmo = Phi->getOrbitalSetSize(); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 + myG_temp.resize(nel); + myG_J.resize(nel); + myL_temp.resize(nel); + myL_J.resize(nel); + + myG_temp = 0; + myG_J = 0; + myL_temp = 0; + myL_J = 0; + + Bbar.resize(nel, nmo); + psiM_inv.resize(nel, nel); + psiM_all.resize(nel, nmo); + dpsiM_all.resize(nel, nmo); + d2psiM_all.resize(nel, nmo); + + Bbar = 0; + psiM_inv = 0; + psiM_all = 0; + dpsiM_all = 0; + d2psiM_all = 0; + + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); + + for (int i = 0; i < nel; i++) + for (int j = 0; j < nel; j++) + psiM_inv(i, j) = psiM_all(i, j); + + Invert(psiM_inv.data(), nel, nel); + + // current value of Gradient and Laplacian + // gradient components + for (int a = 0; a < nel; a++) for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - // current value of Gradient and Laplacian - // gradient components - for (int a = 0; a < nel; a++) - for (int i = 0; i < nel; i++) - for (int k = 0; k < 3; k++) - myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; - // laplacian components - for (int a = 0; a < nel; a++) { - for (int i = 0; i < nel; i++) - myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); - } - - // calculation of myG_J which will be used to represent - // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to - // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The value of - // P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and - // this is what myL_J will hold - for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) { - myG_J[a] = (P.G[iat] - myG_temp[a]); - myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); - } - // possibly replace wit BLAS calls + for (int k = 0; k < 3; k++) + myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; + // laplacian components + for (int a = 0; a < nel; a++) + { for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + - myL_J[i] * psiM_all(i, j); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - const ValueType* const B(Bbar.data()); - ValueMatrix t; - ValueMatrix Y1; - ValueMatrix Y2; - ValueMatrix Y3; - ValueMatrix Y4; - t.resize(nel, nmo); - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, - ValueType(0.0), t.data(), nmo); - BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, - ValueType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo, - Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, - ValueType(0.0), Y3.data(), nmo); - - // possibly replace with BLAS call - Y4 = Y3 - Y2; - - for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = this->myVars.where(i); - if (kk >= 0) { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += t(p, q); - dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); - } + myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); + } + + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The value of + // P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and + // this is what myL_J will hold + for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) + { + myG_J[a] = (P.G[iat] - myG_temp[a]); + myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); + } + // possibly replace wit BLAS calls + for (int i = 0; i < nel; i++) + for (int j = 0; j < nmo; j++) + Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 + const ValueType* const A(psiM_all.data()); + const ValueType* const Ainv(psiM_inv.data()); + const ValueType* const B(Bbar.data()); + ValueMatrix t; + ValueMatrix Y1; + ValueMatrix Y2; + ValueMatrix Y3; + ValueMatrix Y4; + t.resize(nel, nmo); + Y1.resize(nel, nel); + Y2.resize(nel, nmo); + Y3.resize(nel, nmo); + Y4.resize(nel, nmo); + + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), t.data(), nmo); + BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y3.data(), nmo); + + // possibly replace with BLAS call + Y4 = Y3 - Y2; + + for (int i = 0; i < m_act_rot_inds.size(); i++) + { + int kk = this->myVars.where(i); + if (kk >= 0) + { + const int p = m_act_rot_inds.at(i).first; + const int q = m_act_rot_inds.at(i).second; + dlogpsi[kk] += t(p, q); + dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); } + } } template @@ -928,63 +904,68 @@ void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, const size_t NP2, const std::vector>& lookup_tbl) { - bool recalculate(false); - for (int k = 0; k < this->myVars.size(); ++k) { - int kk = this->myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; + bool recalculate(false); + for (int k = 0; k < this->myVars.size(); ++k) + { + int kk = this->myVars.where(k); + if (kk < 0) + continue; + if (optvars.recompute(kk)) + recalculate = true; + } + if (recalculate) + { + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; + const int NP = P.getTotalNum(); + myG_temp.resize(NP); + myG_temp = 0.0; + myL_temp.resize(NP); + myL_temp = 0.0; + myG_J.resize(NP); + myG_J = 0.0; + myL_J.resize(NP); + myL_J = 0.0; + const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = P.last(0) - P.first(0); + + const T* restrict C_p = Coeff.data(); + for (int i = 0; i < Coeff.size(); i++) + { + const size_t upC = C2node_up[i]; + const size_t dnC = C2node_dn[i]; + const T tmp1 = C_p[i] * detValues_dn[dnC]; + const T tmp2 = C_p[i] * detValues_up[upC]; + for (size_t k = 0, j = N1; k < NP1; k++, j++) + { + myG_temp[j] += tmp1 * grads_up(upC, k); + myL_temp[j] += tmp1 * lapls_up(upC, k); + } + for (size_t k = 0, j = N2; k < NP2; k++, j++) + { + myG_temp[j] += tmp2 * grads_dn(dnC, k); + myL_temp[j] += tmp2 * lapls_dn(dnC, k); + } } - if (recalculate) { - typename ParticleSetT::ParticleGradient myG_temp, myG_J; - typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; - const int NP = P.getTotalNum(); - myG_temp.resize(NP); - myG_temp = 0.0; - myL_temp.resize(NP); - myL_temp = 0.0; - myG_J.resize(NP); - myG_J = 0.0; - myL_J.resize(NP); - myL_J = 0.0; - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - const T* restrict C_p = Coeff.data(); - for (int i = 0; i < Coeff.size(); i++) { - const size_t upC = C2node_up[i]; - const size_t dnC = C2node_dn[i]; - const T tmp1 = C_p[i] * detValues_dn[dnC]; - const T tmp2 = C_p[i] * detValues_up[upC]; - for (size_t k = 0, j = N1; k < NP1; k++, j++) { - myG_temp[j] += tmp1 * grads_up(upC, k); - myL_temp[j] += tmp1 * lapls_up(upC, k); - } - for (size_t k = 0, j = N2; k < NP2; k++, j++) { - myG_temp[j] += tmp2 * grads_dn(dnC, k); - myL_temp[j] += tmp2 * lapls_dn(dnC, k); - } - } - myG_temp *= (1 / psiCurrent); - myL_temp *= (1 / psiCurrent); - - // calculation of myG_J which will be used to represent - // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to - // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The - // value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 - // \psi}{\psi} and this is what myL_J will hold - for (int iat = 0; iat < (myL_temp.size()); iat++) { - myG_J[iat] = (P.G[iat] - myG_temp[iat]); - myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); - } + myG_temp *= (1 / psiCurrent); + myL_temp *= (1 / psiCurrent); - table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, - psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, - grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, - Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The + // value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 + // \psi}{\psi} and this is what myL_J will hold + for (int iat = 0; iat < (myL_temp.size()); iat++) + { + myG_J[iat] = (P.G[iat] - myG_temp[iat]); + myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); } + + table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, + detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn, + B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); + } } template @@ -1004,40 +985,54 @@ void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, const std::vector& detData_up, const std::vector>& lookup_tbl) { - bool recalculate(false); - for (int k = 0; k < this->myVars.size(); ++k) { - int kk = this->myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) { - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); + bool recalculate(false); + for (int k = 0; k < this->myVars.size(); ++k) + { + int kk = this->myVars.where(k); + if (kk < 0) + continue; + if (optvars.recompute(kk)) + recalculate = true; + } + if (recalculate) + { + const size_t nmo = Phi->getOrbitalSetSize(); + const size_t nel = P.last(0) - P.first(0); - table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, - C2node_dn, detValues_up, detValues_dn, M_up, M_dn, Minv_up, Minv_dn, - detData_up, lookup_tbl); - } + table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up, + M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl); + } } -template -void -RotatedSPOsT::table_method_eval(Vector& dlogpsi, Vector& dhpsioverpsi, - const typename ParticleSetT::ParticleLaplacian& myL_J, - const typename ParticleSetT::ParticleGradient& myG_J, const size_t nel, - const size_t nmo, const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, const ValueMatrix& B_lapl, - const std::vector& detData_up, const size_t N1, const size_t N2, - const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::table_method_eval(Vector& dlogpsi, + Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GUIDE TO THE MATICES BEING BUILT ---------------------------------------------- @@ -1188,667 +1183,633 @@ to each element will be called B_bar $ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ { - ValueMatrix Table; - ValueMatrix Bbar; - ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; - ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, - MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - pK1.resize(nmo, nel); - K1T.resize(nmo, nmo); - TK1T.resize(nel, nmo); - - pK2.resize(nmo, nel); - K2AiB.resize(nmo, nmo); - TK2AiB.resize(nel, nmo); - K2XA.resize(nmo, nmo); - TK2XA.resize(nel, nmo); - K2T.resize(nmo, nmo); - TK2T.resize(nel, nmo); - MK2T.resize(nel, nmo); - - pK3.resize(nmo, nel); - K3T.resize(nmo, nmo); - TK3T.resize(nel, nmo); - - pK5.resize(nmo, nel); - K5T.resize(nmo, nmo); - TK5T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const T* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - // B_grad holds the gradient operator - // B_lapl holds the laplacian operator - // B_bar will hold our special O operator - - const int offset1(N1); - const int offset2(N2); - const int NPother(NP2); - - T* T_(Table.data()); - - // possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = B_lapl(i, j) + - 2.0 * dot(myG_J[i + offset1], B_grad(i, j)) + - myL_J[i + offset1] * M_up(i, j); - - const T* restrict B(Bbar.data()); - const T* restrict A(M_up.data()); - const T* restrict Ainv(Minv_up.data()); - // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF - // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR - // THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - // REMINDER: that the ValueMatrix "matrix" stores data in a row major order - // and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, - RealType(0.0), T_, nmo); - - BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, - RealType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, - RealType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, - RealType(0.0), Y3.data(), nmo); - - // possibly replace with BLAS call - Y4 = Y3 - Y2; - - // Need to create the constants: (Oi, const0, const1, const2)to take - // advantage of minimal BLAS commands; Oi is the special operator applied to - // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}} - //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as - //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} - std::vector Oi(num_unique_dn_dets); - - for (int index = 0; index < num_unique_dn_dets; index++) - for (int iat = 0; iat < NPother; iat++) - Oi[index] += lapls_dn(index, iat) + - 2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + - myL_J[offset2 + iat] * detValues_dn[index]; - - // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 = - // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 = - // \sum_{i=1} C_{i}*det(A_{i\downarrow})* - // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); + ValueMatrix Table; + ValueMatrix Bbar; + ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; + ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; + + Table.resize(nel, nmo); + + Bbar.resize(nel, nmo); + + Y1.resize(nel, nel); + Y2.resize(nel, nmo); + Y3.resize(nel, nmo); + Y4.resize(nel, nmo); + + pK1.resize(nmo, nel); + K1T.resize(nmo, nmo); + TK1T.resize(nel, nmo); + + pK2.resize(nmo, nel); + K2AiB.resize(nmo, nmo); + TK2AiB.resize(nel, nmo); + K2XA.resize(nmo, nmo); + TK2XA.resize(nel, nmo); + K2T.resize(nmo, nmo); + TK2T.resize(nel, nmo); + MK2T.resize(nel, nmo); + + pK3.resize(nmo, nel); + K3T.resize(nmo, nmo); + TK3T.resize(nel, nmo); + + pK5.resize(nmo, nel); + K5T.resize(nmo, nmo); + TK5T.resize(nel, nmo); + + const int parameters_size(m_act_rot_inds.size()); + const int parameter_start_index(0); + + const size_t num_unique_up_dets(detValues_up.size()); + const size_t num_unique_dn_dets(detValues_dn.size()); + + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); + const size_t* restrict upC(C2node_up.data()); + const size_t* restrict dnC(C2node_dn.data()); + // B_grad holds the gradient operator + // B_lapl holds the laplacian operator + // B_bar will hold our special O operator + + const int offset1(N1); + const int offset2(N2); + const int NPother(NP2); + + T* T_(Table.data()); + + // possibly replace wit BLAS calls + for (int i = 0; i < nel; i++) + for (int j = 0; j < nmo; j++) + Bbar(i, j) = B_lapl(i, j) + 2.0 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); + + const T* restrict B(Bbar.data()); + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); + + BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); + + // possibly replace with BLAS call + Y4 = Y3 - Y2; + + // Need to create the constants: (Oi, const0, const1, const2)to take + // advantage of minimal BLAS commands; Oi is the special operator applied to + // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}} + //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as + //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} + std::vector Oi(num_unique_dn_dets); + + for (int index = 0; index < num_unique_dn_dets; index++) + for (int iat = 0; iat < NPother; iat++) + Oi[index] += lapls_dn(index, iat) + 2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + + myL_J[offset2 + iat] * detValues_dn[index]; + + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 = + // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 = + // \sum_{i=1} C_{i}*det(A_{i\downarrow})* + // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) + RealType const0(0.0), const1(0.0), const2(0.0); + for (size_t i = 0; i < nc; ++i) + { + const RealType c = cptr[i]; + const size_t up = upC[i]; + const size_t down = dnC[i]; + + const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); + const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); + } + + std::fill(pK1.begin(), pK1.end(), 0.0); + std::fill(pK2.begin(), pK2.end(), 0.0); + std::fill(pK3.begin(), pK3.end(), 0.0); + std::fill(pK5.begin(), pK5.end(), 0.0); + + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method + const int* restrict data_it = detData_up.data(); + for (int index = 0, datum = 0; index < num_unique_up_dets; index++) + { + const int k = data_it[datum]; + + if (k == 0) + { + datum += 3 * k + 1; } - std::fill(pK1.begin(), pK1.end(), 0.0); - std::fill(pK2.begin(), pK2.end(), 0.0); - std::fill(pK3.begin(), pK3.end(), 0.0); - std::fill(pK5.begin(), pK5.end(), 0.0); - - // Now we are going to loop through all unique determinants. - // The few lines above are for the reference matrix contribution. - // Although I start the loop below from index 0, the loop only performs - // actions when the index is >= 1 the detData object contains all the - // information about the P^T and Q matrices (projection matrices) needed in - // the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { - const int k = data_it[datum]; - - if (k == 0) { - datum += 3 * k + 1; - } - - else { - // Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - // Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - // Any matrix multiplication of P^T or Q is simply a projection - // Explicit matrix multiplication can be avoided; instead column or - // row copying can be done BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col - // element), Pointer to resultant matrix + place to begin - // pasting, storage spacing of resultant matrix) - // For example the next 4 lines is the matrix multiplication of T*Q - // = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, - Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, - (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y11.resize(nel, k); - Y23.resize(k, k); - Y24.resize(k, k); - Y25.resize(k, k); - Y26.resize(k, nel); - - std::fill(Y11.begin(), Y11.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, - Y11.data() + i, k); - } - - std::fill(Y23.begin(), Y23.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, - (Y23.data() + i * k), 1); - } - - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, - Y6.data(), k, RealType(0.0), Y24.data(), k); - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, - Y24.data(), k, RealType(0.0), Y25.data(), k); - - Y26.resize(k, nel); - - std::fill(Y26.begin(), Y26.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y25.data() + i, k, - Y26.data() + (data_it[datum + 1 + i]), nel); - } - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y6.data() + i, k, - Y7.data() + (data_it[datum + 1 + i]), nel); - } - - // c_Tr_AlphaI_MI is a constant contributing to constant const2 - // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] - RealType c_Tr_AlphaI_MI = 0.0; - for (int i = 0; i < k; i++) { - c_Tr_AlphaI_MI += Y24(i, i); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) { - // el_p is the element position that contains information about - // the CI coefficient, and det up/dn values associated with the - // current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_1(c * detValues_dn[down] * - detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); - const RealType alpha_2(c * detValues_dn[down] * - detValues_up[up] / detValues_up[0]); - const RealType alpha_3( - c * Oi[down] * detValues_up[up] / detValues_up[0]); - - const2 += alpha_1; - - for (int i = 0; i < k; i++) { - BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, - pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, - pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, - pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, - pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; + else + { + // Number of rows and cols of P^T + const int prows = k; + const int pcols = nel; + // Number of rows and cols of Q + const int qrows = nmo; + const int qcols = k; + + Y5.resize(nel, k); + Y6.resize(k, k); + + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 + std::fill(Y5.begin(), Y5.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); + } + + std::fill(Y6.begin(), Y6.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); + } + + Vector WS; + Vector Piv; + WS.resize(k); + Piv.resize(k); + std::complex logdet = 0.0; + InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); + + Y11.resize(nel, k); + Y23.resize(k, k); + Y24.resize(k, k); + Y25.resize(k, k); + Y26.resize(k, nel); + + std::fill(Y11.begin(), Y11.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k); + } + + std::fill(Y23.begin(), Y23.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1); + } + + BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k); + BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k); + + Y26.resize(k, nel); + + std::fill(Y26.begin(), Y26.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel); + } + + Y7.resize(k, nel); + + std::fill(Y7.begin(), Y7.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); + } + + // c_Tr_AlphaI_MI is a constant contributing to constant const2 + // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] + RealType c_Tr_AlphaI_MI = 0.0; + for (int i = 0; i < k; i++) + { + c_Tr_AlphaI_MI += Y24(i, i); + } + + for (int p = 0; p < lookup_tbl[index].size(); p++) + { + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant + const int el_p(lookup_tbl[index][p]); + const RealType c = cptr[el_p]; + const size_t up = upC[el_p]; + const size_t down = dnC[el_p]; + + const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); + const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]); + const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]); + + const2 += alpha_1; + + for (int i = 0; i < k; i++) + { + BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); + BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); } + } + datum += 3 * k + 1; } - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, - RealType(0.0), K1T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, - RealType(0.0), TK1T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, - pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, - nmo, RealType(0.0), TK2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, - pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, - nmo, RealType(0.0), TK2XA.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, - pK2.data(), nel, RealType(0.0), K2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, - RealType(0.0), TK2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, - Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, - RealType(0.0), K3T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, - RealType(0.0), TK3T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, - RealType(0.0), K5T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, - RealType(0.0), TK5T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; - k < (parameter_start_index + parameters_size); k++, mu++) { - int kk = this->myVars.where(k); - if (kk >= 0) { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) { - dhpsioverpsi[kk] += T(-0.5 * Y4(i, j) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + - K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - - K1T(j, i) - TK1T(i, j) - - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + - const2 / const1 * TK2T(i, j) + K3T(i, j) - - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + - TK2T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) { - dhpsioverpsi[kk] += T(-0.5 * (Y4(i, j) - Y4(j, i)) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + - K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + - TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + - TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + - MK2T(j, i) + K1T(i, j) - K1T(j, i) - TK1T(i, j) + - TK1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + - const2 / const1 * TK2T(i, j) - - const2 / const1 * TK2T(j, i) + K3T(i, j) - - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + - K2T(j, i) + TK2T(i, j) - TK2T(j, i))); - } - else { - dhpsioverpsi[kk] += T(-0.5 * - (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - - K2XA(i, j) + K2XA(j, i) - - + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) - - K2T(i, j) + K2T(j, i))); - } - } + } + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), + nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(), + nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo); + + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo); + + for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) + { + int kk = this->myVars.where(k); + if (kk >= 0) + { + const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); + if (i <= nel - 1 && j > nel - 1) + { + dhpsioverpsi[kk] += + T(-0.5 * Y4(i, j) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + + K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - + const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + + K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); + } + else if (i <= nel - 1 && j <= nel - 1) + { + dhpsioverpsi[kk] += + T(-0.5 * (Y4(i, j) - Y4(j, i)) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + + TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + + K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + + K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); + } + else + { + dhpsioverpsi[kk] += T(-0.5 * + (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + + + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + + K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); + } } + } } -template -void -RotatedSPOsT::table_method_evalWF(Vector& dlogpsi, const size_t nel, - const size_t nmo, const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::table_method_evalWF(Vector& dlogpsi, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { - ValueMatrix Table; - ValueMatrix Y5, Y6, Y7; - ValueMatrix pK4, K4T, TK4T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - pK4.resize(nmo, nel); - K4T.resize(nmo, nmo); - TK4T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const T* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - - T* T_(Table.data()); - - const T* restrict A(M_up.data()); - const T* restrict Ainv(Minv_up.data()); - // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF - // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR - // THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - // REMINDER: that the ValueMatrix "matrix" stores data in a row major order - // and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, - RealType(0.0), T_, nmo); - - // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} - // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); + ValueMatrix Table; + ValueMatrix Y5, Y6, Y7; + ValueMatrix pK4, K4T, TK4T; + + Table.resize(nel, nmo); + + Bbar.resize(nel, nmo); + + pK4.resize(nmo, nel); + K4T.resize(nmo, nmo); + TK4T.resize(nel, nmo); + + const int parameters_size(m_act_rot_inds.size()); + const int parameter_start_index(0); + + const size_t num_unique_up_dets(detValues_up.size()); + const size_t num_unique_dn_dets(detValues_dn.size()); + + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); + const size_t* restrict upC(C2node_up.data()); + const size_t* restrict dnC(C2node_dn.data()); + + T* T_(Table.data()); + + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); + + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) + RealType const0(0.0), const1(0.0), const2(0.0); + for (size_t i = 0; i < nc; ++i) + { + const RealType c = cptr[i]; + const size_t up = upC[i]; + const size_t down = dnC[i]; + + const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); + } + + std::fill(pK4.begin(), pK4.end(), 0.0); + + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method + const int* restrict data_it = detData_up.data(); + for (int index = 0, datum = 0; index < num_unique_up_dets; index++) + { + const int k = data_it[datum]; + + if (k == 0) + { + datum += 3 * k + 1; } - std::fill(pK4.begin(), pK4.end(), 0.0); - - // Now we are going to loop through all unique determinants. - // The few lines above are for the reference matrix contribution. - // Although I start the loop below from index 0, the loop only performs - // actions when the index is >= 1 the detData object contains all the - // information about the P^T and Q matrices (projection matrices) needed in - // the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { - const int k = data_it[datum]; - - if (k == 0) { - datum += 3 * k + 1; - } - - else { - // Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - // Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - // Any matrix multiplication of P^T or Q is simply a projection - // Explicit matrix multiplication can be avoided; instead column or - // row copying can be done BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col - // element), Pointer to resultant matrix + place to begin - // pasting, storage spacing of resultant matrix) - // For example the next 4 lines is the matrix multiplication of T*Q - // = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, - Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, - (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) { - BLAS::copy(k, Y6.data() + i, k, - Y7.data() + (data_it[datum + 1 + i]), nel); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) { - // el_p is the element position that contains information about - // the CI coefficient, and det up/dn values associated with the - // current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_4(c * detValues_dn[down] * - detValues_up[up] * (1 / psiCurrent)); - - for (int i = 0; i < k; i++) { - BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, - pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; + else + { + // Number of rows and cols of P^T + const int prows = k; + const int pcols = nel; + // Number of rows and cols of Q + const int qrows = nmo; + const int qcols = k; + + Y5.resize(nel, k); + Y6.resize(k, k); + + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 + std::fill(Y5.begin(), Y5.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); + } + + std::fill(Y6.begin(), Y6.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); + } + + Vector WS; + Vector Piv; + WS.resize(k); + Piv.resize(k); + std::complex logdet = 0.0; + InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); + + Y7.resize(k, nel); + + std::fill(Y7.begin(), Y7.end(), 0.0); + for (int i = 0; i < k; i++) + { + BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); + } + + for (int p = 0; p < lookup_tbl[index].size(); p++) + { + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant + const int el_p(lookup_tbl[index][p]); + const RealType c = cptr[el_p]; + const size_t up = upC[el_p]; + const size_t down = dnC[el_p]; + + const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent)); + + for (int i = 0; i < k; i++) + { + BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); } + } + datum += 3 * k + 1; } - - BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, - RealType(0.0), K4T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, - RealType(0.0), TK4T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; - k < (parameter_start_index + parameters_size); k++, mu++) { - int kk = this->myVars.where(k); - if (kk >= 0) { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) { - dlogpsi[kk] += T(detValues_up[0] * (Table(i, j)) * const0 * - (1 / psiCurrent) + - (K4T(i, j) - K4T(j, i) - TK4T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) { - dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * - const0 * (1 / psiCurrent) + - (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); - } - else { - dlogpsi[kk] += T((K4T(i, j) - K4T(j, i))); - } - } + } + + BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo); + + for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) + { + int kk = this->myVars.where(k); + if (kk >= 0) + { + const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); + if (i <= nel - 1 && j > nel - 1) + { + dlogpsi[kk] += + T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j))); + } + else if (i <= nel - 1 && j <= nel - 1) + { + dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + + (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); + } + else + { + dlogpsi[kk] += T((K4T(i, j) - K4T(j, i))); + } } + } } -template -std::unique_ptr> -RotatedSPOsT::makeClone() const +template +std::unique_ptr> RotatedSPOsT::makeClone() const { - auto myclone = std::make_unique( - SPOSetT::getName(), std::unique_ptr>(Phi->makeClone())); - - myclone->params = this->params; - myclone->params_supplied = this->params_supplied; - myclone->m_act_rot_inds = this->m_act_rot_inds; - myclone->m_full_rot_inds = this->m_full_rot_inds; - myclone->myVars = this->myVars; - myclone->myVarsFull = this->myVarsFull; - myclone->history_params_ = this->history_params_; - myclone->use_global_rot_ = this->use_global_rot_; - return myclone; + auto myclone = std::make_unique(SPOSetT::getName(), std::unique_ptr>(Phi->makeClone())); + + myclone->params = this->params; + myclone->params_supplied = this->params_supplied; + myclone->m_act_rot_inds = this->m_act_rot_inds; + myclone->m_full_rot_inds = this->m_full_rot_inds; + myclone->myVars = this->myVars; + myclone->myVarsFull = this->myVarsFull; + myclone->history_params_ = this->history_params_; + myclone->use_global_rot_ = this->use_global_rot_; + return myclone; } -template -void -RotatedSPOsT::mw_evaluateDetRatios( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void RotatedSPOsT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateDetRatios( - phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); } -template -void -RotatedSPOsT::mw_evaluateValue( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const +template +void RotatedSPOsT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); } -template -void -RotatedSPOsT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void RotatedSPOsT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGL( - phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); } -template -void -RotatedSPOsT::mw_evaluateVGLWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const +template +void RotatedSPOsT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLWithSpin( - phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); } -template -void -RotatedSPOsT::mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const +template +void RotatedSPOsT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGrads( - phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); } -template -void -RotatedSPOsT::mw_evaluateVGLandDetRatioGradsWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads, std::vector& spingrads) const +template +void RotatedSPOsT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, - invRow_ptr_list, phi_vgl_v, ratios, grads, spingrads); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads, + spingrads); } -template -void -RotatedSPOsT::mw_evaluate_notranspose( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void RotatedSPOsT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, - dlogdet_list, d2logdet_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list); } -template -void -RotatedSPOsT::createResource(ResourceCollection& collection) const +template +void RotatedSPOsT::createResource(ResourceCollection& collection) const { - Phi->createResource(collection); + Phi->createResource(collection); } -template -void -RotatedSPOsT::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void RotatedSPOsT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.acquireResource(collection, phi_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.acquireResource(collection, phi_list); } -template -void -RotatedSPOsT::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void RotatedSPOsT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.releaseResource(collection, phi_list); + auto phi_list = extractPhiRefList(spo_list); + auto& leader = phi_list.getLeader(); + leader.releaseResource(collection, phi_list); } -template -RefVectorWithLeader> -RotatedSPOsT::extractPhiRefList( - const RefVectorWithLeader>& spo_list) +template +RefVectorWithLeader> RotatedSPOsT::extractPhiRefList(const RefVectorWithLeader>& spo_list) { - auto& spo_leader = spo_list.template getCastedLeader(); - const auto nw = spo_list.size(); - RefVectorWithLeader> phi_list(*spo_leader.Phi); - phi_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) { - RotatedSPOsT& rot = - spo_list.template getCastedElement(iw); - phi_list.emplace_back(*rot.Phi); - } - return phi_list; + auto& spo_leader = spo_list.template getCastedLeader(); + const auto nw = spo_list.size(); + RefVectorWithLeader> phi_list(*spo_leader.Phi); + phi_list.reserve(nw); + for (int iw = 0; iw < nw; iw++) + { + RotatedSPOsT& rot = spo_list.template getCastedElement(iw); + phi_list.emplace_back(*rot.Phi); + } + return phi_list; } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h index 1ee3b075332..ae5a6cb16fb 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.h +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -19,494 +19,465 @@ namespace qmcplusplus { -template +template class RotatedSPOsT; namespace testing { OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); -std::vector>& -getHistoryParams(RotatedSPOsT& rot); -std::vector>& -getHistoryParams(RotatedSPOsT& rot); +std::vector>& getHistoryParams(RotatedSPOsT& rot); +std::vector>& getHistoryParams(RotatedSPOsT& rot); } // namespace testing -template +template class RotatedSPOsT : public SPOSetT, public OptimizableObjectT { public: - using IndexType = typename SPOSetT::IndexType; - using RealType = typename SPOSetT::RealType; - using ValueType = typename SPOSetT::ValueType; - using FullValueType = typename SPOSetT::FullValueType; - using GradType = typename SPOSetT::GradType; - using ComplexType = typename SPOSetT::ComplexType; - using FullRealType = typename SPOSetT::FullRealType; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using HessVector = typename SPOSetT::HessVector; - using HessMatrix = typename SPOSetT::HessMatrix; - using GGGVector = typename SPOSetT::GGGVector; - using GGGMatrix = typename SPOSetT::GGGMatrix; - using OffloadMWVGLArray = typename SPOSetT::OffloadMWVGLArray; - template - using OffloadMatrix = Matrix>; - - // constructor - RotatedSPOsT( - const std::string& my_name, std::unique_ptr>&& spos); - // destructor - ~RotatedSPOsT() override; - - std::string - getClassName() const override - { - return "RotatedSPOsT"; - } - bool - isOptimizable() const override - { - return true; - } - bool - isOMPoffload() const override - { - return Phi->isOMPoffload(); - } - bool - hasIonDerivs() const override - { - return Phi->hasIonDerivs(); - } - - // Vector of rotation matrix indices - using RotationIndices = std::vector>; - - // Active orbital rotation parameter indices - RotationIndices m_act_rot_inds; - - // Full set of rotation values for global rotation - RotationIndices m_full_rot_inds; - - // Construct a list of the matrix indices for non-zero rotation parameters. - // (The structure for a sparse representation of the matrix) - // Only core->active rotations are created. - static void - createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); - - // Construct a list for all the matrix indices, including core->active, - // core->core and active->active - static void - createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); - - // Fill in antisymmetric matrix from the list of rotation parameter indices - // and a list of parameter values. - // This function assumes rot_mat is properly sized upon input and is set to - // zero. - static void - constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, ValueMatrix& rot_mat); - - // Extract the list of rotation parameters from the entries in an - // antisymmetric matrix This function expects rot_indices and param are the - // same length. - static void - extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, std::vector& param); - - // function to perform orbital rotations - void - apply_rotation(const std::vector& param, bool use_stored_copy); - - // For global rotation, inputs are the old parameters and the delta - // parameters. The corresponding rotation matrices are constructed, - // multiplied together, and the new parameters extracted. The new rotation - // is applied to the underlying SPO coefficients - void - applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param); - - // Perform the construction of matrices and extraction of parameters for a - // delta rotation. Split out and made static for testing. - static void - constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, std::vector& new_param, - ValueMatrix& new_rot_mat); - - // When initializing the rotation from VP files - // This function applies the rotation history - void - applyRotationHistory(); - - // This function applies the global rotation (similar to apply_rotation, but - // for the full set of rotation parameters) - void - applyFullRotation( - const std::vector& full_param, bool use_stored_copy); - - // Compute matrix exponential of an antisymmetric matrix (result is rotation - // matrix) - static void - exponentiate_antisym_matrix(ValueMatrix& mat); - - // Compute matrix log of rotation matrix to produce antisymmetric matrix - static void - log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); - - // A particular SPOSet used for Orbitals - std::unique_ptr> Phi; - - /// Set the rotation parameters (usually from input file) - void - setRotationParameters(const std::vector& param_list); - - /// the number of electrons of the majority spin - size_t nel_major_; - - std::unique_ptr> - makeClone() const override; - - // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the - // Determinant part of the wfn myG_J is the Gradient of the all other parts - // of the wavefunction (typically just the Jastrow). - // It represents \frac{\nabla\psi_{J}}{\psi_{J}} - // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The - // Laplacian portion IMPORTANT NOTE: The value of P.L holds \nabla^2 - // ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J - // will hold - typename ParticleSetT::ParticleGradient myG_temp, myG_J; - typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; - - ValueMatrix Bbar; - ValueMatrix psiM_inv; - ValueMatrix psiM_all; - GradMatrix dpsiM_all; - ValueMatrix d2psiM_all; - - // Single Slater creation - void - buildOptVariables(size_t nel); - - // For the MSD case rotations must be created in MultiSlaterDetTableMethod - // class - void - buildOptVariables(const RotationIndices& rotations, - const RotationIndices& full_rotations); - - void evaluateDerivatives(ParticleSetT& P, + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; + using FullValueType = typename SPOSetT::FullValueType; + using GradType = typename SPOSetT::GradType; + using ComplexType = typename SPOSetT::ComplexType; + using FullRealType = typename SPOSetT::FullRealType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessVector = typename SPOSetT::HessVector; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using OffloadMWVGLArray = typename SPOSetT::OffloadMWVGLArray; + template + using OffloadMatrix = Matrix>; + + // constructor + RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos); + // destructor + ~RotatedSPOsT() override; + + std::string getClassName() const override { return "RotatedSPOsT"; } + bool isOptimizable() const override { return true; } + bool isOMPoffload() const override { return Phi->isOMPoffload(); } + bool hasIonDerivs() const override { return Phi->hasIonDerivs(); } + + // Vector of rotation matrix indices + using RotationIndices = std::vector>; + + // Active orbital rotation parameter indices + RotationIndices m_act_rot_inds; + + // Full set of rotation values for global rotation + RotationIndices m_full_rot_inds; + + // Construct a list of the matrix indices for non-zero rotation parameters. + // (The structure for a sparse representation of the matrix) + // Only core->active rotations are created. + static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); + + // Construct a list for all the matrix indices, including core->active, + // core->core and active->active + static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); + + // Fill in antisymmetric matrix from the list of rotation parameter indices + // and a list of parameter values. + // This function assumes rot_mat is properly sized upon input and is set to + // zero. + static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices, + const std::vector& param, + ValueMatrix& rot_mat); + + // Extract the list of rotation parameters from the entries in an + // antisymmetric matrix This function expects rot_indices and param are the + // same length. + static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, + const ValueMatrix& rot_mat, + std::vector& param); + + // function to perform orbital rotations + void apply_rotation(const std::vector& param, bool use_stored_copy); + + // For global rotation, inputs are the old parameters and the delta + // parameters. The corresponding rotation matrices are constructed, + // multiplied together, and the new parameters extracted. The new rotation + // is applied to the underlying SPO coefficients + void applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + std::vector& new_param); + + // Perform the construction of matrices and extraction of parameters for a + // delta rotation. Split out and made static for testing. + static void constructDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, + std::vector& new_param, + ValueMatrix& new_rot_mat); + + // When initializing the rotation from VP files + // This function applies the rotation history + void applyRotationHistory(); + + // This function applies the global rotation (similar to apply_rotation, but + // for the full set of rotation parameters) + void applyFullRotation(const std::vector& full_param, bool use_stored_copy); + + // Compute matrix exponential of an antisymmetric matrix (result is rotation + // matrix) + static void exponentiate_antisym_matrix(ValueMatrix& mat); + + // Compute matrix log of rotation matrix to produce antisymmetric matrix + static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); + + // A particular SPOSet used for Orbitals + std::unique_ptr> Phi; + + /// Set the rotation parameters (usually from input file) + void setRotationParameters(const std::vector& param_list); + + /// the number of electrons of the majority spin + size_t nel_major_; + + std::unique_ptr> makeClone() const override; + + // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the + // Determinant part of the wfn myG_J is the Gradient of the all other parts + // of the wavefunction (typically just the Jastrow). + // It represents \frac{\nabla\psi_{J}}{\psi_{J}} + // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The + // Laplacian portion IMPORTANT NOTE: The value of P.L holds \nabla^2 + // ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J + // will hold + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; + + ValueMatrix Bbar; + ValueMatrix psiM_inv; + ValueMatrix psiM_all; + GradMatrix dpsiM_all; + ValueMatrix d2psiM_all; + + // Single Slater creation + void buildOptVariables(size_t nel); + + // For the MSD case rotations must be created in MultiSlaterDetTableMethod + // class + void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); + + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) override; + + void evaluateDerivativesWF(ParticleSetT& P, const OptVariablesTypeT& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) override; - - void evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesTypeT& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) override; + int FirstIndex, + int LastIndex) override; - void evaluateDerivatives(ParticleSetT& P, + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) override; + + void evaluateDerivativesWF(ParticleSetT& P, const OptVariablesTypeT& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, - const std::vector& Coeff, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, const std::vector& C2node_up, const std::vector& C2node_dn, const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, const ValueMatrix& M_dn, const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, const std::vector>& lookup_tbl) override; - void evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesTypeT& optvars, - Vector& dlogpsi, - const FullValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) override; - - // helper function to evaluatederivative; evaluate orbital rotation - // parameter derivative using table method - void - table_method_eval(Vector& dlogpsi, Vector& dhpsioverpsi, - const typename ParticleSetT::ParticleLaplacian& myL_J, - const typename ParticleSetT::ParticleGradient& myG_J, - const size_t nel, const size_t nmo, const T& psiCurrent, - const std::vector& Coeff, const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const GradMatrix& grads_up, - const GradMatrix& grads_dn, const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl); - - void - table_method_evalWF(Vector& dlogpsi, const size_t nel, const size_t nmo, - const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const std::vector& detData_up, - const std::vector>& lookup_tbl); - - void - extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs) override - { - opt_obj_refs.push_back(*this); - } - - void checkInVariablesExclusive(OptVariablesTypeT& active) override - { - if (this->myVars.size()) - active.insertFrom(this->myVars); - } - - void checkOutVariables(const OptVariablesTypeT& active) override { this->myVars.getIndex(active); } - - /// reset - void resetParametersExclusive(const OptVariablesTypeT& active) override; - - void - writeVariationalParameters(hdf_archive& hout) override; - - void - readVariationalParameters(hdf_archive& hin) override; - - //********************************************************************************* - // the following functions simply call Phi's corresponding functions - void - setOrbitalSetSize(int norbs) override - { - Phi->setOrbitalSetSize(norbs); - } - - void - checkObject() const override - { - Phi->checkObject(); - } - - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateValue(P, iat, psi); - } - - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); - } - - void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios) override - { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - } - - void evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesTypeT& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) override; - - void - evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi) override - { - assert(psi.size() <= this->OrbitalSetSize); - Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - void - evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - Phi->evaluateVGHGH( - P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); - } - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - Phi->evaluate_notranspose( - P, first, last, logdet, dlogdet, grad_grad_logdet); - } - - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, - grad_grad_logdet, grad_grad_grad_logdet); - } - - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, - GradMatrix& grad_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); - } - - void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, - grad_grad_phi, grad_lapl_phi); - } - - // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, - // GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first, - // last, grad_grad_grad_logdet); } - - /// Use history list (false) or global rotation (true) - void - set_use_global_rotation(bool use_global_rotation) - { - use_global_rot_ = use_global_rotation; - } - - void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - void - mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const override; - - void - mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - void - mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - void - mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const override; - - void - mw_evaluateVGLandDetRatioGradsWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - void - mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void - createResource(ResourceCollection& collection) const override; - - void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override; - - void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override; + // helper function to evaluatederivative; evaluate orbital rotation + // parameter derivative using table method + void table_method_eval(Vector& dlogpsi, + Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); + + void table_method_evalWF(Vector& dlogpsi, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl); + + void extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs) override { opt_obj_refs.push_back(*this); } + + void checkInVariablesExclusive(OptVariablesTypeT& active) override + { + if (this->myVars.size()) + active.insertFrom(this->myVars); + } + + void checkOutVariables(const OptVariablesTypeT& active) override { this->myVars.getIndex(active); } + + /// reset + void resetParametersExclusive(const OptVariablesTypeT& active) override; + + void writeVariationalParameters(hdf_archive& hout) override; + + void readVariationalParameters(hdf_archive& hin) override; + + //********************************************************************************* + // the following functions simply call Phi's corresponding functions + void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); } + + void checkObject() const override { Phi->checkObject(); } + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateValue(P, iat, psi); + } + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); + } + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override + { + Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + } + + void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) override; + + void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi); + } + + // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, + // GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first, + // last, grad_grad_grad_logdet); } + + /// Use history list (false) or global rotation (true) + void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; } + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const override; + + void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const override; + + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void createResource(ResourceCollection& collection) const override; + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; private: - /// true if SPO parameters (orbital rotation parameters) have been supplied - /// by input - bool params_supplied; - /// list of supplied orbital rotation parameters - std::vector params; - - /// Full set of rotation matrix parameters for use in global rotation method - OptVariablesTypeT myVarsFull; - - /// List of previously applied parameters - std::vector> history_params_; - - /// Use global rotation or history list - bool use_global_rot_ = true; - - static RefVectorWithLeader> - extractPhiRefList(const RefVectorWithLeader>& spo_list); - friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); - friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); - friend std::vector>& - testing::getHistoryParams(RotatedSPOsT& rot); - friend std::vector>& - testing::getHistoryParams(RotatedSPOsT& rot); + /// true if SPO parameters (orbital rotation parameters) have been supplied + /// by input + bool params_supplied; + /// list of supplied orbital rotation parameters + std::vector params; + + /// Full set of rotation matrix parameters for use in global rotation method + OptVariablesTypeT myVarsFull; + + /// List of previously applied parameters + std::vector> history_params_; + + /// Use global rotation or history list + bool use_global_rot_ = true; + + static RefVectorWithLeader> extractPhiRefList(const RefVectorWithLeader>& spo_list); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); + friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp index e8342a693ba..21a26cf4130 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp @@ -144,10 +144,9 @@ std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder if (targetPtcl.isSpinor()) { #ifdef QMC_COMPLEX - app_log() << "Einspline Spinor Set\n"; - // FIXME - bb = std::make_unique>(targetPtcl, - ptclPool, myComm, rootNode); + app_log() << "Einspline Spinor Set\n"; + // FIXME + bb = std::make_unique>(targetPtcl, ptclPool, myComm, rootNode); #else PRE.error("Use of einspline spinors requires QMC_COMPLEX=1. " "Rebuild with this option"); @@ -156,11 +155,10 @@ std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder else { #if defined(HAVE_EINSPLINE) - PRE << "EinsplineSetBuilder: using libeinspline for B-spline " - "orbitals.\n"; - // FIXME - bb = std::make_unique>(targetPtcl, ptclPool, - myComm, rootNode); + PRE << "EinsplineSetBuilder: using libeinspline for B-spline " + "orbitals.\n"; + // FIXME + bb = std::make_unique>(targetPtcl, ptclPool, myComm, rootNode); #else PRE.error("Einspline is missing for B-spline orbitals", true); #endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h index 9841988d003..b2d503b0458 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h @@ -24,66 +24,53 @@ namespace qmcplusplus { -template +template class SPOSetBuilderFactoryT : public MPIObjectBase { public: - using SPOMap = typename SPOSetT::SPOMap; - using PSetMap = - std::map>>; + using SPOMap = typename SPOSetT::SPOMap; + using PSetMap = std::map>>; - /** constructor + /** constructor * \param comm communicator * \param els reference to the electrons * \param ions reference to the ions */ - SPOSetBuilderFactoryT( - Communicate* comm, ParticleSetT& els, const PSetMap& psets); + SPOSetBuilderFactoryT(Communicate* comm, ParticleSetT& els, const PSetMap& psets); - ~SPOSetBuilderFactoryT(); + ~SPOSetBuilderFactoryT(); - std::unique_ptr> - createSPOSetBuilder(xmlNodePtr rootNode); + std::unique_ptr> createSPOSetBuilder(xmlNodePtr rootNode); - /** returns a named sposet from the pool + /** returns a named sposet from the pool * only use in serial portion of execution * ie during initialization prior to threaded code */ - const SPOSetT* - getSPOSet(const std::string& name) const; + const SPOSetT* getSPOSet(const std::string& name) const; - void - buildSPOSetCollection(xmlNodePtr cur); + void buildSPOSetCollection(xmlNodePtr cur); - bool - empty() const - { - return sposets.empty(); - } + bool empty() const { return sposets.empty(); } - /** add an SPOSet to sposets map. + /** add an SPOSet to sposets map. * This is only used to handle legacy SPOSet input styles without using * sposet_collection */ - void addSPOSet(std::unique_ptr>); + void addSPOSet(std::unique_ptr>); - SPOMap&& - exportSPOSets() - { - return std::move(sposets); - } + SPOMap&& exportSPOSets() { return std::move(sposets); } private: - /// reference to the target particle - ParticleSetT& targetPtcl; + /// reference to the target particle + ParticleSetT& targetPtcl; - /// reference to the particle pool - const PSetMap& ptclPool; + /// reference to the particle pool + const PSetMap& ptclPool; - /// list of all sposets created by the builders of this factory - SPOMap sposets; + /// list of all sposets created by the builders of this factory + SPOMap sposets; - static std::string basisset_tag; + static std::string basisset_tag; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h index 814601bbacb..9c9b0bdb729 100644 --- a/src/QMCWaveFunctions/SPOSetScannerT.h +++ b/src/QMCWaveFunctions/SPOSetScannerT.h @@ -19,266 +19,235 @@ namespace qmcplusplus { -template +template struct OutputReportMakerBase { - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; - const ValueVector& SPO_v_avg; - const ValueVector& SPO_l_avg; - const GradVector& SPO_g_avg; - int nknots; + const ValueVector& SPO_v_avg; + const ValueVector& SPO_l_avg; + const GradVector& SPO_g_avg; + int nknots; }; -template +template struct OutputReportMaker : OutputReportMakerBase { - using RealType = typename SPOSetT::RealType; + using RealType = typename SPOSetT::RealType; - void - operator()(std::ofstream& output_report) const + void operator()(std::ofstream& output_report) const + { + output_report << "# Report: Orb Value_avg Gradients_avg Laplacian_avg" << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { - output_report - << "# Report: Orb Value_avg Gradients_avg Laplacian_avg" - << std::endl; - for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { - auto one_over_nknots = static_cast(1.0 / this->nknots); - output_report << "\t" << iorb << " " << std::scientific - << this->SPO_v_avg[iorb] * one_over_nknots << " " - << this->SPO_g_avg[iorb][0] * one_over_nknots << " " - << this->SPO_g_avg[iorb][1] * one_over_nknots << " " - << this->SPO_g_avg[iorb][2] * one_over_nknots << " " - << this->SPO_l_avg[iorb] * one_over_nknots - << std::fixed << std::endl; - } + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_g_avg[iorb][0] * one_over_nknots << " " << this->SPO_g_avg[iorb][1] * one_over_nknots + << " " << this->SPO_g_avg[iorb][2] * one_over_nknots << " " + << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl; } + } }; -template -struct OutputReportMaker> : - OutputReportMakerBase> +template +struct OutputReportMaker> : OutputReportMakerBase> { - using RealType = typename SPOSetT::RealType; + using RealType = typename SPOSetT::RealType; - void - operator()(std::ofstream& output_report) const + void operator()(std::ofstream& output_report) const + { + output_report << "# Report: Orb Value_avg I/R Gradients_avg Laplacian_avg" << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { - output_report - << "# Report: Orb Value_avg I/R Gradients_avg Laplacian_avg" - << std::endl; - for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) { - auto one_over_nknots = static_cast(1.0 / this->nknots); - output_report << "\t" << iorb << " " << std::scientific - << this->SPO_v_avg[iorb] * one_over_nknots << " " - << this->SPO_v_avg[iorb].imag() / - this->SPO_v_avg[iorb].real() - << " " << this->SPO_g_avg[iorb][0] * one_over_nknots - << " " << this->SPO_g_avg[iorb][1] * one_over_nknots - << " " << this->SPO_g_avg[iorb][2] * one_over_nknots - << " " << this->SPO_l_avg[iorb] * one_over_nknots - << std::fixed << std::endl; - } + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_v_avg[iorb].imag() / this->SPO_v_avg[iorb].real() << " " + << this->SPO_g_avg[iorb][0] * one_over_nknots << " " << this->SPO_g_avg[iorb][1] * one_over_nknots + << " " << this->SPO_g_avg[iorb][2] * one_over_nknots << " " + << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl; } + } }; /** a scanner for all the SPO sets. */ -template +template class SPOSetScannerT { public: - using PtclPool = - std::map>>; - using SPOSetMap = typename SPOSetT::SPOMap; - using RealType = typename SPOSetT::RealType; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; - using HessVector = typename SPOSetT::HessVector; - - RealType - myfabs(RealType s) - { - return std::fabs(s); - } - template - std::complex - myfabs(std::complex& s) - { - return std::complex(myfabs(s.real()), myfabs(s.imag())); - } - template - TinyVector - myfabs(TinyVector& s) - { - return TinyVector( - myfabs(s[0]), myfabs(s[1]), myfabs(s[2])); - } - - const SPOSetMap& sposets; - ParticleSetT& target; - const PtclPool& ptcl_pool_; - ParticleSetT* ions; - - // construction/destruction - SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT& targetPtcl, - const PtclPool& psets) : - sposets(sposets_in), - target(targetPtcl), - ptcl_pool_(psets), - ions(0){}; - //~SPOSetScannerT(){}; - - // processing scanning - void - put(xmlNodePtr cur) + using PtclPool = std::map>>; + using SPOSetMap = typename SPOSetT::SPOMap; + using RealType = typename SPOSetT::RealType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + + RealType myfabs(RealType s) { return std::fabs(s); } + template + std::complex myfabs(std::complex& s) + { + return std::complex(myfabs(s.real()), myfabs(s.imag())); + } + template + TinyVector myfabs(TinyVector& s) + { + return TinyVector(myfabs(s[0]), myfabs(s[1]), myfabs(s[2])); + } + + const SPOSetMap& sposets; + ParticleSetT& target; + const PtclPool& ptcl_pool_; + ParticleSetT* ions; + + // construction/destruction + SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT& targetPtcl, const PtclPool& psets) + : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){}; + //~SPOSetScannerT(){}; + + // processing scanning + void put(xmlNodePtr cur) + { + app_log() << "Entering the SPO set scanner!" << std::endl; + // check in the source particle set and search for it in the pool. + std::string sourcePtcl("ion0"); + OhmmsAttributeSet aAttrib; + aAttrib.add(sourcePtcl, "source"); + aAttrib.put(cur); + auto pit(ptcl_pool_.find(sourcePtcl)); + if (pit == ptcl_pool_.end()) + app_log() << "Source particle set not found. Can not be used as " + "reference point." + << std::endl; + else + ions = pit->second.get(); + + // scanning the SPO sets + xmlNodePtr cur_save = cur; + for (const auto& [name, sposet] : sposets) { - app_log() << "Entering the SPO set scanner!" << std::endl; - // check in the source particle set and search for it in the pool. - std::string sourcePtcl("ion0"); + app_log() << " Processing SPO " << sposet->getName() << std::endl; + // scanning the paths + cur = cur_save->children; + while (cur != NULL) + { + std::string trace_name("no name"); OhmmsAttributeSet aAttrib; - aAttrib.add(sourcePtcl, "source"); + aAttrib.add(trace_name, "name"); aAttrib.put(cur); - auto pit(ptcl_pool_.find(sourcePtcl)); - if (pit == ptcl_pool_.end()) - app_log() << "Source particle set not found. Can not be used as " - "reference point." - << std::endl; + std::string cname(getNodeName(cur)); + std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name); + if (cname == "path") + { + app_log() << " Scanning a " << cname << " called " << trace_name << " and writing to " + << prefix + "_v/g/l/report.dat" << std::endl; + auto spo = sposet->makeClone(); + scan_path(cur, *spo, prefix); + } else - ions = pit->second.get(); - - // scanning the SPO sets - xmlNodePtr cur_save = cur; - for (const auto& [name, sposet] : sposets) { - app_log() << " Processing SPO " << sposet->getName() << std::endl; - // scanning the paths - cur = cur_save->children; - while (cur != NULL) { - std::string trace_name("no name"); - OhmmsAttributeSet aAttrib; - aAttrib.add(trace_name, "name"); - aAttrib.put(cur); - std::string cname(getNodeName(cur)); - std::string prefix( - sposet->getName() + "_" + cname + "_" + trace_name); - if (cname == "path") { - app_log() << " Scanning a " << cname << " called " - << trace_name << " and writing to " - << prefix + "_v/g/l/report.dat" << std::endl; - auto spo = sposet->makeClone(); - scan_path(cur, *spo, prefix); - } - else { - if (cname != "text" && cname != "comment") - app_log() << " Unknown type of scanning " << cname - << std::endl; - } - cur = cur->next; - } + { + if (cname != "text" && cname != "comment") + app_log() << " Unknown type of scanning " << cname << std::endl; } - app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl; + cur = cur->next; + } } - - // scanning a path - void - scan_path(xmlNodePtr cur, SPOSetT& sposet, std::string prefix) + app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl; + } + + // scanning a path + void scan_path(xmlNodePtr cur, SPOSetT& sposet, std::string prefix) + { + std::string file_name; + file_name = prefix + "_v.dat"; + std::ofstream output_v(file_name.c_str()); + file_name = prefix + "_g.dat"; + std::ofstream output_g(file_name.c_str()); + file_name = prefix + "_l.dat"; + std::ofstream output_l(file_name.c_str()); + file_name = prefix + "_report.dat"; + std::ofstream output_report(file_name.c_str()); + + int nknots(2); + int from_atom(-1); + int to_atom(-1); + TinyVector from_pos(0.0, 0.0, 0.0); + TinyVector to_pos(0.0, 0.0, 0.0); + + OhmmsAttributeSet aAttrib; + aAttrib.add(nknots, "nknots"); + aAttrib.add(from_atom, "from_atom"); + aAttrib.add(to_atom, "to_atom"); + aAttrib.add(from_pos, "from_pos"); + aAttrib.add(to_pos, "to_pos"); + aAttrib.put(cur); + + // sanity check + if (nknots < 2) + nknots = 2; + // check out the reference atom coordinates + if (ions) { - std::string file_name; - file_name = prefix + "_v.dat"; - std::ofstream output_v(file_name.c_str()); - file_name = prefix + "_g.dat"; - std::ofstream output_g(file_name.c_str()); - file_name = prefix + "_l.dat"; - std::ofstream output_l(file_name.c_str()); - file_name = prefix + "_report.dat"; - std::ofstream output_report(file_name.c_str()); - - int nknots(2); - int from_atom(-1); - int to_atom(-1); - TinyVector from_pos(0.0, 0.0, 0.0); - TinyVector to_pos(0.0, 0.0, 0.0); - - OhmmsAttributeSet aAttrib; - aAttrib.add(nknots, "nknots"); - aAttrib.add(from_atom, "from_atom"); - aAttrib.add(to_atom, "to_atom"); - aAttrib.add(from_pos, "from_pos"); - aAttrib.add(to_pos, "to_pos"); - aAttrib.put(cur); - - // sanity check - if (nknots < 2) - nknots = 2; - // check out the reference atom coordinates - if (ions) { - if (from_atom >= 0 && from_atom < ions->R.size()) - from_pos = ions->R[from_atom]; - if (to_atom >= 0 && to_atom < ions->R.size()) - to_pos = ions->R[to_atom]; - } + if (from_atom >= 0 && from_atom < ions->R.size()) + from_pos = ions->R[from_atom]; + if (to_atom >= 0 && to_atom < ions->R.size()) + to_pos = ions->R[to_atom]; + } - // prepare a fake particle set - ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg; - GradVector SPO_g, SPO_g_avg; - int OrbitalSize(sposet.size()); - SPO_v.resize(OrbitalSize); - SPO_g.resize(OrbitalSize); - SPO_l.resize(OrbitalSize); - SPO_v_avg.resize(OrbitalSize); - SPO_g_avg.resize(OrbitalSize); - SPO_l_avg.resize(OrbitalSize); - SPO_v_avg = 0.0; - SPO_g_avg = 0.0; - SPO_l_avg = 0.0; - double Delta = 1.0 / (nknots - 1); - int elec_count = target.R.size(); - auto R_saved = target.R; - typename ParticleSetT::SingleParticlePos zero_pos(0.0, 0.0, 0.0); - for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) { - if (ind == elec_count) - ind = 0; - target.R[ind][0] = - (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0]; - target.R[ind][1] = - (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1]; - target.R[ind][2] = - (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2]; - target.makeMove(ind, zero_pos); - sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l); - std::ostringstream o; - o << "x_y_z " << std::fixed << std::setprecision(7) - << target.R[ind][0] << " " << target.R[ind][1] << " " - << target.R[ind][2]; - output_v << o.str() << " : " << std::scientific - << std::setprecision(12); - output_g << o.str() << " : " << std::scientific - << std::setprecision(12); - output_l << o.str() << " : " << std::scientific - << std::setprecision(12); - for (int iorb = 0; iorb < OrbitalSize; iorb++) { - SPO_v_avg[iorb] += myfabs(SPO_v[iorb]); - SPO_g_avg[iorb] += myfabs(SPO_g[iorb]); - SPO_l_avg[iorb] += myfabs(SPO_l[iorb]); - output_v << SPO_v[iorb] << " "; - output_g << SPO_g[iorb][0] << " " << SPO_g[iorb][1] << " " - << SPO_g[iorb][2] << " "; - output_l << SPO_l[iorb] << " "; - } - output_v << std::endl; - output_g << std::endl; - output_l << std::endl; - } - // restore the whole target. - target.R = R_saved; - target.update(); - OutputReportMaker{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}( - output_report); - output_v.close(); - output_g.close(); - output_l.close(); - output_report.close(); + // prepare a fake particle set + ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg; + GradVector SPO_g, SPO_g_avg; + int OrbitalSize(sposet.size()); + SPO_v.resize(OrbitalSize); + SPO_g.resize(OrbitalSize); + SPO_l.resize(OrbitalSize); + SPO_v_avg.resize(OrbitalSize); + SPO_g_avg.resize(OrbitalSize); + SPO_l_avg.resize(OrbitalSize); + SPO_v_avg = 0.0; + SPO_g_avg = 0.0; + SPO_l_avg = 0.0; + double Delta = 1.0 / (nknots - 1); + int elec_count = target.R.size(); + auto R_saved = target.R; + typename ParticleSetT::SingleParticlePos zero_pos(0.0, 0.0, 0.0); + for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) + { + if (ind == elec_count) + ind = 0; + target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0]; + target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1]; + target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2]; + target.makeMove(ind, zero_pos); + sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l); + std::ostringstream o; + o << "x_y_z " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " " + << target.R[ind][2]; + output_v << o.str() << " : " << std::scientific << std::setprecision(12); + output_g << o.str() << " : " << std::scientific << std::setprecision(12); + output_l << o.str() << " : " << std::scientific << std::setprecision(12); + for (int iorb = 0; iorb < OrbitalSize; iorb++) + { + SPO_v_avg[iorb] += myfabs(SPO_v[iorb]); + SPO_g_avg[iorb] += myfabs(SPO_g[iorb]); + SPO_l_avg[iorb] += myfabs(SPO_l[iorb]); + output_v << SPO_v[iorb] << " "; + output_g << SPO_g[iorb][0] << " " << SPO_g[iorb][1] << " " << SPO_g[iorb][2] << " "; + output_l << SPO_l[iorb] << " "; + } + output_v << std::endl; + output_g << std::endl; + output_l << std::endl; } + // restore the whole target. + target.R = R_saved; + target.update(); + OutputReportMaker{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(output_report); + output_v.close(); + output_g.close(); + output_l.close(); + output_report.close(); + } }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h index 4900c0499fd..67167a54da2 100644 --- a/src/QMCWaveFunctions/SPOSetT.h +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -37,7 +37,7 @@ namespace qmcplusplus { class ResourceCollection; -template +template class SPOSetT; namespace testing { @@ -53,217 +53,175 @@ OptVariablesTypeT>& getMyVars(SPOSetT> * a number of single-particle orbitals with capabilities of evaluating \f$ * \psi_j({\bf r}_i)\f$ */ -template +template class SPOSetT : public QMCTraits { public: - using ValueVector = typename OrbitalSetTraits::ValueVector; - using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; - using GradVector = typename OrbitalSetTraits::GradVector; - using GradMatrix = typename OrbitalSetTraits::GradMatrix; - using GradType = TinyVector; - using HessVector = typename OrbitalSetTraits::HessVector; - using HessMatrix = typename OrbitalSetTraits::HessMatrix; - using GGGVector = typename OrbitalSetTraits::GradHessVector; - using GGGMatrix = typename OrbitalSetTraits::GradHessMatrix; - using SPOMap = - std::map>>; - using OffloadMWVGLArray = - Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = - Array>; // [walker, Orbs] - using PosType = typename OrbitalSetTraits::PosType; - using RealType = typename OrbitalSetTraits::RealType; - using ComplexType = typename OrbitalSetTraits::ComplexType; - using ValueType = typename OrbitalSetTraits::ValueType; - using FullRealType = typename OrbitalSetTraits::RealType; - using FullValueType = typename OrbitalSetTraits::FullValueType; - ; - template - using OffloadMatrix = Matrix>; - - /** constructor */ - SPOSetT(const std::string& my_name); - - /** destructor + using ValueVector = typename OrbitalSetTraits::ValueVector; + using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; + using GradVector = typename OrbitalSetTraits::GradVector; + using GradMatrix = typename OrbitalSetTraits::GradMatrix; + using GradType = TinyVector; + using HessVector = typename OrbitalSetTraits::HessVector; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGVector = typename OrbitalSetTraits::GradHessVector; + using GGGMatrix = typename OrbitalSetTraits::GradHessMatrix; + using SPOMap = std::map>>; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = Array>; // [walker, Orbs] + using PosType = typename OrbitalSetTraits::PosType; + using RealType = typename OrbitalSetTraits::RealType; + using ComplexType = typename OrbitalSetTraits::ComplexType; + using ValueType = typename OrbitalSetTraits::ValueType; + using FullRealType = typename OrbitalSetTraits::RealType; + using FullValueType = typename OrbitalSetTraits::FullValueType; + ; + template + using OffloadMatrix = Matrix>; + + /** constructor */ + SPOSetT(const std::string& my_name); + + /** destructor * * Derived class destructor needs to pay extra attention to freeing memory * shared among clones of SPOSet. */ - virtual ~SPOSetT() = default; + virtual ~SPOSetT() = default; - /** return the size of the orbital set + /** return the size of the orbital set * Ye: this needs to be replaced by getOrbitalSetSize(); */ - inline int - size() const - { - return OrbitalSetSize; - } + inline int size() const { return OrbitalSetSize; } - /** print basic SPOSet information + /** print basic SPOSet information */ - void - basic_report(const std::string& pad = "") const; + void basic_report(const std::string& pad = "") const; - /** print SPOSet information + /** print SPOSet information */ - virtual void - report(const std::string& pad = "") const - { - basic_report(pad); - } + virtual void report(const std::string& pad = "") const { basic_report(pad); } - /** return the size of the orbitals + /** return the size of the orbitals */ - inline int - getOrbitalSetSize() const - { - return OrbitalSetSize; - } + inline int getOrbitalSetSize() const { return OrbitalSetSize; } - /// Query if this SPOSet is optimizable - virtual bool - isOptimizable() const - { - return false; - } + /// Query if this SPOSet is optimizable + virtual bool isOptimizable() const { return false; } - /** extract underlying OptimizableObject references + /** extract underlying OptimizableObject references * @param opt_obj_refs aggregated list of optimizable object references */ - virtual void - extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs); + virtual void extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs); - /** check out variational optimizable variables + /** check out variational optimizable variables * @param active a super set of optimizable variables */ - virtual void checkOutVariables(const OptVariablesTypeT& active); + virtual void checkOutVariables(const OptVariablesTypeT& active); - /// Query if this SPOSet uses OpenMP offload - virtual bool - isOMPoffload() const - { - return false; - } + /// Query if this SPOSet uses OpenMP offload + virtual bool isOMPoffload() const { return false; } - /** Query if this SPOSet has an explicit ion dependence. returns true if it + /** Query if this SPOSet has an explicit ion dependence. returns true if it * does. */ - virtual bool - hasIonDerivs() const - { - return false; - } - - /// check a few key parameters before putting the SPO into a determinant - virtual void - checkObject() const - { - } - - /// return true if this SPOSet can be wrappered by RotatedSPO - virtual bool - isRotationSupported() const - { - return false; - } - /// store parameters before getting destroyed by rotation. - virtual void - storeParamsBeforeRotation() - { - } - /// apply rotation to all the orbitals - virtual void - applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); - - /// Parameter derivatives of the wavefunction and the Laplacian of the - /// wavefunction - virtual void evaluateDerivatives(ParticleSetT& P, + virtual bool hasIonDerivs() const { return false; } + + /// check a few key parameters before putting the SPO into a determinant + virtual void checkObject() const {} + + /// return true if this SPOSet can be wrappered by RotatedSPO + virtual bool isRotationSupported() const { return false; } + /// store parameters before getting destroyed by rotation. + virtual void storeParamsBeforeRotation() {} + /// apply rotation to all the orbitals + virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); + + /// Parameter derivatives of the wavefunction and the Laplacian of the + /// wavefunction + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex); + + /// Parameter derivatives of the wavefunction + virtual void evaluateDerivativesWF(ParticleSetT& P, const OptVariablesTypeT& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex); - - /// Parameter derivatives of the wavefunction - virtual void evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesTypeT& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex); - - /** Evaluate the derivative of the optimized orbitals with respect to the + int FirstIndex, + int LastIndex); + + /** Evaluate the derivative of the optimized orbitals with respect to the + * parameters this is used only for MSD, to be refined for better serving + * both single and multi SD + */ + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); + + /** Evaluate the derivative of the optimized orbitals with respect to the * parameters this is used only for MSD, to be refined for better serving * both single and multi SD */ - virtual void evaluateDerivatives(ParticleSetT& P, + virtual void evaluateDerivativesWF(ParticleSetT& P, const OptVariablesTypeT& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const T& psiCurrent, + Vector& dlogpsi, + const FullValueType& psiCurrent, const std::vector& Coeff, const std::vector& C2node_up, const std::vector& C2node_dn, const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, const ValueMatrix& M_dn, const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, const std::vector>& lookup_tbl); - /** Evaluate the derivative of the optimized orbitals with respect to the - * parameters this is used only for MSD, to be refined for better serving - * both single and multi SD - */ - virtual void evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesTypeT& optvars, - Vector& dlogpsi, - const FullValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - /** set the OrbitalSetSize + /** set the OrbitalSetSize * @param norbs number of single-particle orbitals * Ye: I prefer to remove this interface in the future. SPOSet builders need * to handle the size correctly. It doesn't make sense allowing to set the * value at any place in the code. * @TODO make it purely virtual */ - virtual void - setOrbitalSetSize(int norbs){}; + virtual void setOrbitalSetSize(int norbs){}; - /** evaluate the values of this single-particle orbital set + /** evaluate the values of this single-particle orbital set * @param P current ParticleSet * @param iat active particle * @param psi values of the SPO * @TODO make it purely virtual */ - virtual void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi){}; + virtual void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi){}; - /** evaluate determinant ratios for virtual moves, e.g., sphere move for + /** evaluate determinant ratios for virtual moves, e.g., sphere move for * nonlocalPP * @param VP virtual particle set * @param psi values of the SPO, used as a scratch space if needed @@ -271,22 +229,23 @@ class SPOSetT : public QMCTraits * particle moved virtually * @param ratios return determinant ratios */ - virtual void - evaluateDetRatios(const VirtualParticleSetT& VP, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios); - - /// Determinant ratios and parameter derivatives of the wavefunction for - /// virtual moves - virtual void evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesTypeT& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex); - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios); + + /// Determinant ratios and parameter derivatives of the wavefunction for + /// virtual moves + virtual void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex); + + /** evaluate determinant ratios for virtual moves, e.g., sphere move for * nonlocalPP, of multiple walkers * @param spo_list the list of SPOSet pointers in a walker batch * @param vp_list a list of virtual particle sets in a walker batch @@ -296,14 +255,13 @@ class SPOSetT : public QMCTraits * matrix corresponding to the particles moved virtually * @param ratios_list a list of returning determinant ratios */ - virtual void - mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const; + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital set * @param P current ParticleSet * @param iat active particle @@ -312,11 +270,9 @@ class SPOSetT : public QMCTraits * @param d2psi laplacians of the SPO * @TODO make this purely virtual */ - virtual void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi){}; + virtual void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){}; - /** evaluate the values, gradients and laplacians and spin gradient of this + /** evaluate the values, gradients and laplacians and spin gradient of this * single-particle orbital set * @param P current ParticleSet * @param iat active particle @@ -325,23 +281,26 @@ class SPOSetT : public QMCTraits * @param d2psi laplacians of the SPO * @param dspin spin gradients of the SPO */ - virtual void - evaluateVGL_spin(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin); + virtual void evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin); - /** evaluate the values this single-particle orbital sets of multiple + /** evaluate the values this single-particle orbital sets of multiple * walkers * @param spo_list the list of SPOSet pointers in a walker batch * @param P_list the list of ParticleSet pointers in a walker batch * @param iat active particle * @param psi_v_list the list of value vector pointers in a walker batch */ - virtual void - mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const; + virtual void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital sets of multiple walkers * @param spo_list the list of SPOSet pointers in a walker batch * @param P_list the list of ParticleSet pointers in a walker batch @@ -351,14 +310,14 @@ class SPOSetT : public QMCTraits * @param d2psi_v_list the list of laplacian vector pointers in a walker * batch */ - virtual void - mw_evaluateVGL(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; + virtual void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; - /** evaluate the values, gradients and laplacians and spin gradient of this + /** evaluate the values, gradients and laplacians and spin gradient of this * single-particle orbital sets of multiple walkers * @param spo_list the list of SPOSet pointers in a walker batch * @param P_list the list of ParticleSet pointers in a walker batch @@ -370,15 +329,15 @@ class SPOSetT : public QMCTraits * @param mw_dspin is a dual matrix of spin gradients [nw][norb] * Note that the device side of mw_dspin is up to date */ - virtual void - mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const; + virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital sets and determinant ratio and grads of multiple walkers. Device * data of phi_vgl_v must be up-to-date upon return * @param spo_list the list of SPOSet pointers in a walker batch @@ -388,15 +347,15 @@ class SPOSetT : public QMCTraits * walkers * @param psi_ratio_grads_v determinant ratio and grads of all the walkers */ - virtual void - mw_evaluateVGLandDetRatioGrads( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads) const; + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital sets and determinant ratio and grads of multiple walkers. Device * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients * @param spo_list the list of SPOSet pointers in a walker batch @@ -408,15 +367,16 @@ class SPOSetT : public QMCTraits * @param grads, spatial gradients of all walkers * @param spingrads, spin gradients of all walkers */ - virtual void - mw_evaluateVGLandDetRatioGradsWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads, std::vector& spingrads) const; + virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const; - /** evaluate the values, gradients and hessians of this single-particle + /** evaluate the values, gradients and hessians of this single-particle * orbital set * @param P current ParticleSet * @param iat active particle @@ -424,11 +384,13 @@ class SPOSetT : public QMCTraits * @param dpsi gradients of the SPO * @param grad_grad_psi hessians of the SPO */ - virtual void - evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi); + virtual void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi); - /** evaluate the values, gradients, hessians, and grad hessians of this + /** evaluate the values, gradients, hessians, and grad hessians of this * single-particle orbital set * @param P current ParticleSet * @param iat active particle @@ -437,31 +399,29 @@ class SPOSetT : public QMCTraits * @param grad_grad_psi hessians of the SPO * @param grad_grad_grad_psi grad hessians of the SPO */ - virtual void - evaluateVGHGH(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi); + virtual void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi); - /** evaluate the values of this single-particle orbital set + /** evaluate the values of this single-particle orbital set * @param P current ParticleSet * @param iat active particle * @param psi values of the SPO */ - virtual void - evaluate_spin( - const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi); + virtual void evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi); - /** evaluate the third derivatives of this single-particle orbital set + /** evaluate the third derivatives of this single-particle orbital set * @param P current ParticleSet * @param first first particle * @param last last particle * @param grad_grad_grad_logdet third derivatives of the SPO */ - virtual void - evaluateThirdDeriv(const ParticleSetT& P, int first, int last, - GGGMatrix& grad_grad_grad_logdet); + virtual void evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& grad_grad_grad_logdet); - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital for [first,last) particles * @param[in] P current ParticleSet * @param[in] first starting index of the particles @@ -471,11 +431,14 @@ class SPOSetT : public QMCTraits * @param[out] d2logdet laplacians * @TODO make this pure virtual */ - virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet){}; + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet){}; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital for [first,last) particles, including the spin gradient * @param P current ParticleSet * @param first starting index of the particles @@ -488,19 +451,23 @@ class SPOSetT : public QMCTraits * default implementation will abort for all SPOSets except SpinorSet * */ - virtual void - evaluate_notranspose_spin(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet); - - virtual void - mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const; - - /** evaluate the values, gradients and hessians of this single-particle + virtual void evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet); + + virtual void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const; + + /** evaluate the values, gradients and hessians of this single-particle * orbital for [first,last) particles * @param P current ParticleSet * @param first starting index of the particles @@ -510,11 +477,14 @@ class SPOSetT : public QMCTraits * @param grad_grad_logdet hessians * */ - virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet); + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet); - /** evaluate the values, gradients, hessians and third derivatives of this + /** evaluate the values, gradients, hessians and third derivatives of this * single-particle orbital for [first,last) particles * @param P current ParticleSet * @param first starting index of the particles @@ -525,12 +495,15 @@ class SPOSetT : public QMCTraits * @param grad_grad_grad_logdet third derivatives * */ - virtual void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet); + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet); - /** evaluate the gradients of this single-particle orbital + /** evaluate the gradients of this single-particle orbital * for [first,last) target particles with respect to the given source * particle * @param P current ParticleSet @@ -540,11 +513,14 @@ class SPOSetT : public QMCTraits * @param gradphi gradients * */ - virtual void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& gradphi); + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi); - /** evaluate the gradients of values, gradients, laplacians of this + /** evaluate the gradients of values, gradients, laplacians of this * single-particle orbital for [first,last) target particles with respect to * the given source particle * @param P current ParticleSet @@ -556,12 +532,16 @@ class SPOSetT : public QMCTraits * @param grad_lapl_phi gradients of laplacians * */ - virtual void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi); + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi); - /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. + /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. * * @param[in] P particle set. * @param[in] iel The electron at which to evaluate phi(r_iel) @@ -570,96 +550,67 @@ class SPOSetT : public QMCTraits * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). * @return Void */ - virtual void - evaluateGradSourceRow(const ParticleSetT& P, int iel, - const ParticleSetT& source, int iat_src, GradVector& gradphi); + virtual void evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& gradphi); - /** access the k point related to the given orbital */ - virtual PosType - get_k(int orb) - { - return PosType(); - } + /** access the k point related to the given orbital */ + virtual PosType get_k(int orb) { return PosType(); } - /** initialize a shared resource and hand it to collection + /** initialize a shared resource and hand it to collection */ - virtual void - createResource(ResourceCollection& collection) const - { - } + virtual void createResource(ResourceCollection& collection) const {} - /** acquire a shared resource from collection + /** acquire a shared resource from collection */ - virtual void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const - { - } + virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} - /** return a shared resource to collection + /** return a shared resource to collection */ - virtual void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const - { - } + virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} - /** make a clone of itself + /** make a clone of itself * every derived class must implement this to have threading working * correctly. */ - [[noreturn]] virtual std::unique_ptr> - makeClone() const; + [[noreturn]] virtual std::unique_ptr> makeClone() const; - /** Used only by cusp correction in AOS LCAO. + /** Used only by cusp correction in AOS LCAO. * Ye: the SoA LCAO moves all this responsibility to the builder. * This interface should be removed with AoS. */ - virtual bool - transformSPOSet() - { - return true; - } + virtual bool transformSPOSet() { return true; } - /** finalize the construction of SPOSet + /** finalize the construction of SPOSet * * for example, classes serving accelerators may need to transfer data from * host to device after the host side objects are built. */ - virtual void - finalizeConstruction() - { - } - - /// return object name - const std::string& - getName() const - { - return my_name_; - } - - /// @TODO make this purely virutal return class name - virtual std::string - getClassName() const - { - return ""; - }; + virtual void finalizeConstruction() {} + + /// return object name + const std::string& getName() const { return my_name_; } + + /// @TODO make this purely virutal return class name + virtual std::string getClassName() const { return ""; }; protected: - /// name of the object, unique identifier - const std::string my_name_; - /// number of Single-particle orbitals - IndexType OrbitalSetSize; - /// Optimizable variables - OptVariablesTypeT myVars; - - friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); - friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); - friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); - friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); + /// name of the object, unique identifier + const std::string my_name_; + /// number of Single-particle orbitals + IndexType OrbitalSetSize; + /// Optimizable variables + OptVariablesTypeT myVars; + + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); }; -template +template using SPOSetTPtr = SPOSetT*; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp index bac10a6ec8a..239d9939258 100644 --- a/src/QMCWaveFunctions/SpinorSetT.cpp +++ b/src/QMCWaveFunctions/SpinorSetT.cpp @@ -20,602 +20,573 @@ namespace qmcplusplus { -template +template struct SpinorSetT::SpinorSetMultiWalkerResource : public Resource { - SpinorSetMultiWalkerResource() : Resource("SpinorSet") - { - } - SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : - SpinorSetMultiWalkerResource() - { - } - std::unique_ptr - makeClone() const override - { - return std::make_unique(*this); - } - OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; - std::vector up_ratios, dn_ratios; - std::vector up_grads, dn_grads; - std::vector spins; + SpinorSetMultiWalkerResource() : Resource("SpinorSet") {} + SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {} + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; + std::vector up_ratios, dn_ratios; + std::vector up_grads, dn_grads; + std::vector spins; }; -template -SpinorSetT::SpinorSetT(const std::string& my_name) : - SPOSetT(my_name), - spo_up(nullptr), - spo_dn(nullptr) -{ -} +template +SpinorSetT::SpinorSetT(const std::string& my_name) : SPOSetT(my_name), spo_up(nullptr), spo_dn(nullptr) +{} -template +template SpinorSetT::~SpinorSetT() = default; -template -void -SpinorSetT::set_spos( - std::unique_ptr>&& up, std::unique_ptr>&& dn) +template +void SpinorSetT::set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn) { - // Sanity check for input SPO's. They need to be the same size or - IndexType spo_size_up = up->getOrbitalSetSize(); - IndexType spo_size_down = dn->getOrbitalSetSize(); + // Sanity check for input SPO's. They need to be the same size or + IndexType spo_size_up = up->getOrbitalSetSize(); + IndexType spo_size_down = dn->getOrbitalSetSize(); - if (spo_size_up != spo_size_down) - throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO " - "components have different sizes."); + if (spo_size_up != spo_size_down) + throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO " + "components have different sizes."); - setOrbitalSetSize(spo_size_up); + setOrbitalSetSize(spo_size_up); - spo_up = std::move(up); - spo_dn = std::move(dn); + spo_up = std::move(up); + spo_dn = std::move(dn); - psi_work_up.resize(this->OrbitalSetSize); - psi_work_down.resize(this->OrbitalSetSize); + psi_work_up.resize(this->OrbitalSetSize); + psi_work_down.resize(this->OrbitalSetSize); - dpsi_work_up.resize(this->OrbitalSetSize); - dpsi_work_down.resize(this->OrbitalSetSize); + dpsi_work_up.resize(this->OrbitalSetSize); + dpsi_work_down.resize(this->OrbitalSetSize); - d2psi_work_up.resize(this->OrbitalSetSize); - d2psi_work_down.resize(this->OrbitalSetSize); + d2psi_work_up.resize(this->OrbitalSetSize); + d2psi_work_down.resize(this->OrbitalSetSize); } -template -void -SpinorSetT::setOrbitalSetSize(int norbs) +template +void SpinorSetT::setOrbitalSetSize(int norbs) { - this->OrbitalSetSize = norbs; + this->OrbitalSetSize = norbs; }; -template -void -SpinorSetT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void SpinorSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - psi_work_up = 0.0; - psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); + spo_up->evaluateValue(P, iat, psi_work_up); + spo_dn->evaluateValue(P, iat, psi_work_down); - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - // This is only supported in the complex build, so T is some complex number - // depending on the precision. - T eis(coss, sins); - T emis(coss, -sins); + // This is only supported in the complex build, so T is some complex number + // depending on the precision. + T eis(coss, sins); + T emis(coss, -sins); - psi = eis * psi_work_up + emis * psi_work_down; + psi = eis * psi_work_up + emis * psi_work_down; } -template -void -SpinorSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) +template +void SpinorSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; + dpsi_work_up = 0.0; + dpsi_work_down = 0.0; + d2psi_work_up = 0.0; + d2psi_work_down = 0.0; - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); + spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); + spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); + T eis(coss, sins); + T emis(coss, -sins); - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eis * dpsi_work_up + emis * dpsi_work_down; + d2psi = eis * d2psi_work_up + emis * d2psi_work_down; } -template -void -SpinorSetT::evaluateVGL_spin(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) +template +void SpinorSetT::evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin) { - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; + dpsi_work_up = 0.0; + dpsi_work_down = 0.0; + d2psi_work_up = 0.0; + d2psi_work_down = 0.0; - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); + spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); + spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; - dspin = eye * (eis * psi_work_up - emis * psi_work_down); + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eis * dpsi_work_up + emis * dpsi_work_down; + d2psi = eis * d2psi_work_up + emis * d2psi_work_down; + dspin = eye * (eis * psi_work_up - emis * psi_work_down); } -template -void -SpinorSetT::mw_evaluateVGLWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const +template +void SpinorSetT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const { - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - RefVector up_psi_v_list, dn_psi_v_list; - RefVector up_dpsi_v_list, dn_dpsi_v_list; - RefVector up_d2psi_v_list, dn_d2psi_v_list; - for (int iw = 0; iw < nw; iw++) { - auto& spo = spo_list.template getCastedElement>(iw); - up_psi_v_list.push_back(spo.psi_work_up); - dn_psi_v_list.push_back(spo.psi_work_down); - up_dpsi_v_list.push_back(spo.dpsi_work_up); - dn_dpsi_v_list.push_back(spo.dpsi_work_down); - up_d2psi_v_list.push_back(spo.d2psi_work_up); - dn_d2psi_v_list.push_back(spo.d2psi_work_down); - } + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + + IndexType nw = spo_list.size(); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + RefVector up_psi_v_list, dn_psi_v_list; + RefVector up_dpsi_v_list, dn_dpsi_v_list; + RefVector up_d2psi_v_list, dn_d2psi_v_list; + for (int iw = 0; iw < nw; iw++) + { + auto& spo = spo_list.template getCastedElement>(iw); + up_psi_v_list.push_back(spo.psi_work_up); + dn_psi_v_list.push_back(spo.psi_work_down); + up_dpsi_v_list.push_back(spo.dpsi_work_up); + dn_dpsi_v_list.push_back(spo.dpsi_work_down); + up_d2psi_v_list.push_back(spo.d2psi_work_up); + dn_d2psi_v_list.push_back(spo.d2psi_work_down); + } + + up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list); + dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list); + + for (int iw = 0; iw < nw; iw++) + { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); - up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, - up_dpsi_v_list, up_d2psi_v_list); - dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, - dn_dpsi_v_list, dn_d2psi_v_list); - - for (int iw = 0; iw < nw; iw++) { - typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); - - psi_v_list[iw].get() = - eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); - dpsi_v_list[iw].get() = - eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); - d2psi_v_list[iw].get() = - eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) - mw_dspin(iw, iorb) = eye * - (eis * (up_psi_v_list[iw].get())[iorb] - - emis * (dn_psi_v_list[iw].get())[iorb]); - } - // Data above is all on host, but since mw_dspin is DualMatrix we need to - // sync the host and device - mw_dspin.updateTo(); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); + + psi_v_list[iw].get() = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); + dpsi_v_list[iw].get() = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); + d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]); + } + // Data above is all on host, but since mw_dspin is DualMatrix we need to + // sync the host and device + mw_dspin.updateTo(); } -template -void -SpinorSetT::mw_evaluateVGLandDetRatioGradsWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, std::vector& grads, - std::vector& spingrads) const +template +void SpinorSetT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const { - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - - auto& mw_res = spo_leader.mw_res_handle_.getResource(); - auto& up_phi_vgl_v = mw_res.up_phi_vgl_v; - auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v; - auto& up_ratios = mw_res.up_ratios; - auto& dn_ratios = mw_res.dn_ratios; - auto& up_grads = mw_res.up_grads; - auto& dn_grads = mw_res.dn_grads; - auto& spins = mw_res.spins; - - up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); - dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); - up_ratios.resize(nw); - dn_ratios.resize(nw); - up_grads.resize(nw); - dn_grads.resize(nw); - spins.resize(nw); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, - invRow_ptr_list, up_phi_vgl_v, up_ratios, up_grads); - dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, - invRow_ptr_list, dn_phi_vgl_v, dn_ratios, dn_grads); - for (int iw = 0; iw < nw; iw++) { - typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); - spins[iw] = s; - RealType coss = std::cos(s); - RealType sins = std::sin(s); - - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); - - ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; - grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + - emis * dn_grads[iw] * dn_ratios[iw]) / - ratios[iw]; - spingrads[iw] = - eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw]; - } + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + + auto& mw_res = spo_leader.mw_res_handle_.getResource(); + auto& up_phi_vgl_v = mw_res.up_phi_vgl_v; + auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v; + auto& up_ratios = mw_res.up_ratios; + auto& dn_ratios = mw_res.dn_ratios; + auto& up_grads = mw_res.up_grads; + auto& dn_grads = mw_res.dn_grads; + auto& spins = mw_res.spins; + + up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); + dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); + up_ratios.resize(nw); + dn_ratios.resize(nw); + up_grads.resize(nw); + dn_grads.resize(nw); + spins.resize(nw); + + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios, + up_grads); + dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios, + dn_grads); + for (int iw = 0; iw < nw; iw++) + { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + spins[iw] = s; + RealType coss = std::cos(s); + RealType sins = std::sin(s); - auto* spins_ptr = spins.data(); - // This data lives on the device - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); - auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); - PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])") - for (int iw = 0; iw < nw; iw++) { - RealType c, s; - omptarget::sincos(spins_ptr[iw], &s, &c); - T eis(c, s), emis(c, -s); - PRAGMA_OFFLOAD("omp parallel for collapse(2)") - for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++) - for (int iorb = 0; iorb < norb_requested; iorb++) { - auto offset = - idim * nw * norb_requested + iw * norb_requested + iorb; - phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + - emis * dn_phi_vgl_ptr[offset]; - } - } + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); + + ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; + grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw]; + spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw]; + } + + auto* spins_ptr = spins.data(); + // This data lives on the device + auto* phi_vgl_ptr = phi_vgl_v.data(); + auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); + auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); + PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])") + for (int iw = 0; iw < nw; iw++) + { + RealType c, s; + omptarget::sincos(spins_ptr[iw], &s, &c); + T eis(c, s), emis(c, -s); + PRAGMA_OFFLOAD("omp parallel for collapse(2)") + for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iorb = 0; iorb < norb_requested; iorb++) + { + auto offset = idim * nw * norb_requested + iw * norb_requested + iorb; + phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset]; + } + } } -template -void -SpinorSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +template +void SpinorSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - IndexType nelec = P.getTotalNum(); + IndexType nelec = P.getTotalNum(); - logpsi_work_up.resize(nelec, this->OrbitalSetSize); - logpsi_work_down.resize(nelec, this->OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); - spo_up->evaluate_notranspose( - P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, - dlogpsi_work_down, d2logpsi_work_down); + spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); + spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - for (int iat = 0; iat < nelec; iat++) { - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + for (int iat = 0; iat < nelec; iat++) + { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); + T eis(coss, sins); + T emis(coss, -sins); - for (int no = 0; no < this->OrbitalSetSize; no++) { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + - emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + - emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + - emis * d2logpsi_work_down(iat, no); - } + for (int no = 0; no < this->OrbitalSetSize; no++) + { + logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); + dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); + d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); } + } } -template -void -SpinorSetT::mw_evaluate_notranspose( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void SpinorSetT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - IndexType nelec = P_leader.getTotalNum(); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - std::vector mw_up_logdet, mw_dn_logdet; - std::vector mw_up_dlogdet, mw_dn_dlogdet; - std::vector mw_up_d2logdet, mw_dn_d2logdet; - mw_up_logdet.reserve(nw); - mw_dn_logdet.reserve(nw); - mw_up_dlogdet.reserve(nw); - mw_dn_dlogdet.reserve(nw); - mw_up_d2logdet.reserve(nw); - mw_dn_d2logdet.reserve(nw); - - RefVector up_logdet_list, dn_logdet_list; - RefVector up_dlogdet_list, dn_dlogdet_list; - RefVector up_d2logdet_list, dn_d2logdet_list; - up_logdet_list.reserve(nw); - dn_logdet_list.reserve(nw); - up_dlogdet_list.reserve(nw); - dn_dlogdet_list.reserve(nw); - up_d2logdet_list.reserve(nw); - dn_d2logdet_list.reserve(nw); - - ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize); - GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize); - for (int iw = 0; iw < nw; iw++) { - mw_up_logdet.emplace_back(tmp_val_mat); - up_logdet_list.emplace_back(mw_up_logdet.back()); - mw_dn_logdet.emplace_back(tmp_val_mat); - dn_logdet_list.emplace_back(mw_dn_logdet.back()); - - mw_up_dlogdet.emplace_back(tmp_grad_mat); - up_dlogdet_list.emplace_back(mw_up_dlogdet.back()); - mw_dn_dlogdet.emplace_back(tmp_grad_mat); - dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back()); - - mw_up_d2logdet.emplace_back(tmp_val_mat); - up_d2logdet_list.emplace_back(mw_up_d2logdet.back()); - mw_dn_d2logdet.emplace_back(tmp_val_mat); - dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& P_leader = P_list.getLeader(); + assert(this == &spo_leader); + + IndexType nw = spo_list.size(); + IndexType nelec = P_leader.getTotalNum(); + + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + + std::vector mw_up_logdet, mw_dn_logdet; + std::vector mw_up_dlogdet, mw_dn_dlogdet; + std::vector mw_up_d2logdet, mw_dn_d2logdet; + mw_up_logdet.reserve(nw); + mw_dn_logdet.reserve(nw); + mw_up_dlogdet.reserve(nw); + mw_dn_dlogdet.reserve(nw); + mw_up_d2logdet.reserve(nw); + mw_dn_d2logdet.reserve(nw); + + RefVector up_logdet_list, dn_logdet_list; + RefVector up_dlogdet_list, dn_dlogdet_list; + RefVector up_d2logdet_list, dn_d2logdet_list; + up_logdet_list.reserve(nw); + dn_logdet_list.reserve(nw); + up_dlogdet_list.reserve(nw); + dn_dlogdet_list.reserve(nw); + up_d2logdet_list.reserve(nw); + dn_d2logdet_list.reserve(nw); + + ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize); + GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize); + for (int iw = 0; iw < nw; iw++) + { + mw_up_logdet.emplace_back(tmp_val_mat); + up_logdet_list.emplace_back(mw_up_logdet.back()); + mw_dn_logdet.emplace_back(tmp_val_mat); + dn_logdet_list.emplace_back(mw_dn_logdet.back()); + + mw_up_dlogdet.emplace_back(tmp_grad_mat); + up_dlogdet_list.emplace_back(mw_up_dlogdet.back()); + mw_dn_dlogdet.emplace_back(tmp_grad_mat); + dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back()); + + mw_up_d2logdet.emplace_back(tmp_val_mat); + up_d2logdet_list.emplace_back(mw_up_d2logdet.back()); + mw_dn_d2logdet.emplace_back(tmp_val_mat); + dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back()); + } + + up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list, + up_d2logdet_list); + dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list, + dn_d2logdet_list); + + for (int iw = 0; iw < nw; iw++) + for (int iat = 0; iat < nelec; iat++) + { + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); + + for (int no = 0; no < this->OrbitalSetSize; no++) + { + logdet_list[iw].get()(iat, no) = + eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no); + dlogdet_list[iw].get()(iat, no) = + eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no); + d2logdet_list[iw].get()(iat, no) = + eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no); + } } - - up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, - up_logdet_list, up_dlogdet_list, up_d2logdet_list); - dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, - dn_logdet_list, dn_dlogdet_list, dn_d2logdet_list); - - for (int iw = 0; iw < nw; iw++) - for (int iat = 0; iat < nelec; iat++) { - typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - - for (int no = 0; no < this->OrbitalSetSize; no++) { - logdet_list[iw].get()(iat, no) = - eis * up_logdet_list[iw].get()(iat, no) + - emis * dn_logdet_list[iw].get()(iat, no); - dlogdet_list[iw].get()(iat, no) = - eis * up_dlogdet_list[iw].get()(iat, no) + - emis * dn_dlogdet_list[iw].get()(iat, no); - d2logdet_list[iw].get()(iat, no) = - eis * up_d2logdet_list[iw].get()(iat, no) + - emis * dn_d2logdet_list[iw].get()(iat, no); - } - } } -template -void -SpinorSetT::evaluate_notranspose_spin(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) +template +void SpinorSetT::evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) { - IndexType nelec = P.getTotalNum(); + IndexType nelec = P.getTotalNum(); - logpsi_work_up.resize(nelec, this->OrbitalSetSize); - logpsi_work_down.resize(nelec, this->OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); - spo_up->evaluate_notranspose( - P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, - dlogpsi_work_down, d2logpsi_work_down); + spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); + spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - for (int iat = 0; iat < nelec; iat++) { - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + for (int iat = 0; iat < nelec; iat++) + { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - for (int no = 0; no < this->OrbitalSetSize; no++) { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + - emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + - emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + - emis * d2logpsi_work_down(iat, no); - dspinlogdet(iat, no) = eye * - (eis * logpsi_work_up(iat, no) - - emis * logpsi_work_down(iat, no)); - } + for (int no = 0; no < this->OrbitalSetSize; no++) + { + logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); + dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); + d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); + dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no)); } + } } -template -void -SpinorSetT::evaluate_spin( - const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) +template +void SpinorSetT::evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) { - psi_work_up = 0.0; - psi_work_down = 0.0; + psi_work_up = 0.0; + psi_work_down = 0.0; - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); + spo_up->evaluateValue(P, iat, psi_work_up); + spo_dn->evaluateValue(P, iat, psi_work_down); - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss(0.0), sins(0.0); + RealType coss(0.0), sins(0.0); - coss = std::cos(s); - sins = std::sin(s); + coss = std::cos(s); + sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - T eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eye * (eis * psi_work_up - emis * psi_work_down); + psi = eis * psi_work_up + emis * psi_work_down; + dpsi = eye * (eis * psi_work_up - emis * psi_work_down); } -template -void -SpinorSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, GradMatrix& gradphi) +template +void SpinorSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) { - IndexType nelec = P.getTotalNum(); - - GradMatrix gradphi_up(nelec, this->OrbitalSetSize); - GradMatrix gradphi_dn(nelec, this->OrbitalSetSize); - spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); - spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); - - for (int iat = 0; iat < nelec; iat++) { - typename ParticleSetT::Scalar_t s = P.activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - T eis(coss, sins); - T emis(coss, -sins); - for (int imo = 0; imo < this->OrbitalSetSize; imo++) - gradphi(iat, imo) = - gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; - } + IndexType nelec = P.getTotalNum(); + + GradMatrix gradphi_up(nelec, this->OrbitalSetSize); + GradMatrix gradphi_dn(nelec, this->OrbitalSetSize); + spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); + spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); + + for (int iat = 0; iat < nelec; iat++) + { + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); + for (int imo = 0; imo < this->OrbitalSetSize; imo++) + gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; + } } -template -std::unique_ptr> -SpinorSetT::makeClone() const +template +std::unique_ptr> SpinorSetT::makeClone() const { - auto myclone = std::make_unique>(this->my_name_); - std::unique_ptr> cloneup(spo_up->makeClone()); - std::unique_ptr> clonedn(spo_dn->makeClone()); - myclone->set_spos(std::move(cloneup), std::move(clonedn)); - return myclone; + auto myclone = std::make_unique>(this->my_name_); + std::unique_ptr> cloneup(spo_up->makeClone()); + std::unique_ptr> clonedn(spo_dn->makeClone()); + myclone->set_spos(std::move(cloneup), std::move(clonedn)); + return myclone; } -template -void -SpinorSetT::createResource(ResourceCollection& collection) const +template +void SpinorSetT::createResource(ResourceCollection& collection) const { - spo_up->createResource(collection); - spo_dn->createResource(collection); - auto index = collection.addResource( - std::make_unique()); + spo_up->createResource(collection); + spo_dn->createResource(collection); + auto index = collection.addResource(std::make_unique()); } -template -void -SpinorSetT::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void SpinorSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.acquireResource(collection, up_spo_list); - dn_spo_leader.acquireResource(collection, dn_spo_list); - spo_leader.mw_res_handle_ = - collection.lendResource(); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + up_spo_leader.acquireResource(collection, up_spo_list); + dn_spo_leader.acquireResource(collection, dn_spo_list); + spo_leader.mw_res_handle_ = collection.lendResource(); } -template -void -SpinorSetT::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const +template +void SpinorSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.template getCastedLeader>(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.releaseResource(collection, up_spo_list); - dn_spo_leader.releaseResource(collection, dn_spo_list); - collection.takebackResource(spo_leader.mw_res_handle_); + auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& up_spo_leader = up_spo_list.getLeader(); + auto& dn_spo_leader = dn_spo_list.getLeader(); + up_spo_leader.releaseResource(collection, up_spo_list); + dn_spo_leader.releaseResource(collection, dn_spo_list); + collection.takebackResource(spo_leader.mw_res_handle_); } -template -std::pair>, RefVectorWithLeader>> -SpinorSetT::extractSpinComponentRefList( +template +std::pair>, RefVectorWithLeader>> SpinorSetT::extractSpinComponentRefList( const RefVectorWithLeader>& spo_list) const { - SpinorSetT& spo_leader = - spo_list.template getCastedLeader>(); - IndexType nw = spo_list.size(); - SPOSetT& up_spo_leader = *(spo_leader.spo_up); - SPOSetT& dn_spo_leader = *(spo_leader.spo_dn); - RefVectorWithLeader> up_spo_list(up_spo_leader); - RefVectorWithLeader> dn_spo_list(dn_spo_leader); - up_spo_list.reserve(nw); - dn_spo_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) { - SpinorSetT& spinor = - spo_list.template getCastedElement>(iw); - up_spo_list.emplace_back(*(spinor.spo_up)); - dn_spo_list.emplace_back(*(spinor.spo_dn)); - } - return std::make_pair(up_spo_list, dn_spo_list); + SpinorSetT& spo_leader = spo_list.template getCastedLeader>(); + IndexType nw = spo_list.size(); + SPOSetT& up_spo_leader = *(spo_leader.spo_up); + SPOSetT& dn_spo_leader = *(spo_leader.spo_dn); + RefVectorWithLeader> up_spo_list(up_spo_leader); + RefVectorWithLeader> dn_spo_list(dn_spo_leader); + up_spo_list.reserve(nw); + dn_spo_list.reserve(nw); + for (int iw = 0; iw < nw; iw++) + { + SpinorSetT& spinor = spo_list.template getCastedElement>(iw); + up_spo_list.emplace_back(*(spinor.spo_up)); + dn_spo_list.emplace_back(*(spinor.spo_dn)); + } + return std::make_pair(up_spo_list, dn_spo_list); } template class SpinorSetT>; diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h index 08d869b1129..5bad28be2db 100644 --- a/src/QMCWaveFunctions/SpinorSetT.h +++ b/src/QMCWaveFunctions/SpinorSetT.h @@ -24,71 +24,50 @@ namespace qmcplusplus /** Class for Melton & Mitas style Spinors. * */ -template +template class SpinorSetT : public SPOSetT { public: - using ValueMatrix = typename SPOSetT::ValueMatrix; - using ValueVector = typename SPOSetT::ValueVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; - using GradVector = typename SPOSetT::GradVector; - using OffloadMWVGLArray = - Array>; // [VGL, walker, Orbs] - // using OffloadMWVGLArray = typename SPOSetT::template - // OffloadMWCGLArray; - template - using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; - using RealType = typename SPOSetT::RealType; - using ComplexType = typename SPOSetT::ComplexType; - using IndexType = OHMMS_INDEXTYPE; - - /** constructor */ - SpinorSetT(const std::string& my_name); - ~SpinorSetT() override; - - std::string - getClassName() const override - { - return "SpinorSet"; - } - bool - isOptimizable() const override - { - return spo_up->isOptimizable() || spo_dn->isOptimizable(); - } - bool - isOMPoffload() const override - { - return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); - } - bool - hasIonDerivs() const override - { - return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); - } - - // This class is initialized by separately building the up and down channels - // of the spinor set and then registering them. - void - set_spos( - std::unique_ptr>&& up, std::unique_ptr>&& dn); - - /** set the OrbitalSetSize + using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using GradVector = typename SPOSetT::GradVector; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + // using OffloadMWVGLArray = typename SPOSetT::template + // OffloadMWCGLArray; + template + using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using IndexType = OHMMS_INDEXTYPE; + + /** constructor */ + SpinorSetT(const std::string& my_name); + ~SpinorSetT() override; + + std::string getClassName() const override { return "SpinorSet"; } + bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); } + bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); } + bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); } + + // This class is initialized by separately building the up and down channels + // of the spinor set and then registering them. + void set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn); + + /** set the OrbitalSetSize * @param norbs number of single-particle orbitals */ - void - setOrbitalSetSize(int norbs) override; + void setOrbitalSetSize(int norbs) override; - /** evaluate the values of this spinor set + /** evaluate the values of this spinor set * @param P current ParticleSet * @param iat active particle * @param psi values of the SPO */ - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital set * @param P current ParticleSet * @param iat active particle @@ -96,11 +75,9 @@ class SpinorSetT : public SPOSetT * @param dpsi gradients of the SPO * @param d2psi laplacians of the SPO */ - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - /** evaluate the values, gradients and laplacians of this single-particle + /** evaluate the values, gradients and laplacians of this single-particle * orbital set * @param P current ParticleSet * @param iat active particle @@ -109,11 +86,14 @@ class SpinorSetT : public SPOSetT * @param d2psi laplacians of the SPO * @param dspin spin gradient of the SPO */ - void - evaluateVGL_spin(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) override; - - /** evaluate the values, gradients and laplacians and spin gradient of this + void evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin) override; + + /** evaluate the values, gradients and laplacians and spin gradient of this * single-particle orbital sets of multiple walkers * @param spo_list the list of SPOSet pointers in a walker batch * @param P_list the list of ParticleSet pointers in a walker batch @@ -124,15 +104,15 @@ class SpinorSetT : public SPOSetT * batch * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals */ - void - mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - /** evaluate the values, gradients and laplacians of this single-particle + void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + /** evaluate the values, gradients and laplacians of this single-particle * orbital sets and determinant ratio and grads of multiple walkers. Device * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients * @param spo_list the list of SPOSet pointers in a walker batch @@ -144,15 +124,16 @@ class SpinorSetT : public SPOSetT * @param grads, spatial gradients of all walkers * @param spingrads, spin gradients of all walkers */ - void - mw_evaluateVGLandDetRatioGradsWithSpin( - const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, std::vector& ratios, - std::vector& grads, std::vector& spingrads) const override; - - /** evaluate the values, gradients and laplacians of this single-particle + void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const override; + + /** evaluate the values, gradients and laplacians of this single-particle * orbital for [first,last) particles * @param P current ParticleSet * @param first starting index of the particles @@ -162,23 +143,29 @@ class SpinorSetT : public SPOSetT * @param d2logdet laplacians * */ - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - void - mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int first, int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void - evaluate_notranspose_spin(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) override; - /** Evaluate the values, spin gradients, and spin laplacians of single + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) override; + /** Evaluate the values, spin gradients, and spin laplacians of single * particle spinors corresponding to electron iat. * @param P current particle set. * @param iat electron index. @@ -186,11 +173,9 @@ class SpinorSetT : public SPOSetT * @param spin gradient values. d/ds phi(r,s). * */ - void - evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, - ValueVector& dpsi) override; + void evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) override; - /** evaluate the gradients of this single-particle orbital + /** evaluate the gradients of this single-particle orbital * for [first,last) target particles with respect to the given source * particle * @param P current ParticleSet @@ -200,69 +185,60 @@ class SpinorSetT : public SPOSetT * @param gradphi gradients * */ - virtual void - evaluateGradSource(const ParticleSetT& P, int first, int last, - const ParticleSetT& source, int iat_src, - GradMatrix& gradphi) override; + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override; - std::unique_ptr> - makeClone() const override; + std::unique_ptr> makeClone() const override; - void - createResource(ResourceCollection& collection) const override; + void createResource(ResourceCollection& collection) const override; - void - acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override; + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; - void - releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& spo_list) const override; + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; - /// check if the multi walker resource is owned. For testing only. - bool - isResourceOwned() const - { - return bool(mw_res_handle_); - } + /// check if the multi walker resource is owned. For testing only. + bool isResourceOwned() const { return bool(mw_res_handle_); } private: - struct SpinorSetMultiWalkerResource; - ResourceHandle mw_res_handle_; + struct SpinorSetMultiWalkerResource; + ResourceHandle mw_res_handle_; - std::pair>, RefVectorWithLeader>> - extractSpinComponentRefList( - const RefVectorWithLeader>& spo_list) const; + std::pair>, RefVectorWithLeader>> extractSpinComponentRefList( + const RefVectorWithLeader>& spo_list) const; - // Sposet for the up and down channels of our spinors. - std::unique_ptr> spo_up; - std::unique_ptr> spo_dn; + // Sposet for the up and down channels of our spinors. + std::unique_ptr> spo_up; + std::unique_ptr> spo_dn; - // temporary arrays for holding the values of the up and down channels - // respectively. - ValueVector psi_work_up; - ValueVector psi_work_down; + // temporary arrays for holding the values of the up and down channels + // respectively. + ValueVector psi_work_up; + ValueVector psi_work_down; - // temporary arrays for holding the gradients of the up and down channels - // respectively. - GradVector dpsi_work_up; - GradVector dpsi_work_down; + // temporary arrays for holding the gradients of the up and down channels + // respectively. + GradVector dpsi_work_up; + GradVector dpsi_work_down; - // temporary arrays for holding the laplacians of the up and down channels - // respectively. - ValueVector d2psi_work_up; - ValueVector d2psi_work_down; + // temporary arrays for holding the laplacians of the up and down channels + // respectively. + ValueVector d2psi_work_up; + ValueVector d2psi_work_down; - // Same as above, but these are the full matrices containing all - // spinor/particle combinations. - ValueMatrix logpsi_work_up; - ValueMatrix logpsi_work_down; + // Same as above, but these are the full matrices containing all + // spinor/particle combinations. + ValueMatrix logpsi_work_up; + ValueMatrix logpsi_work_down; - GradMatrix dlogpsi_work_up; - GradMatrix dlogpsi_work_down; + GradMatrix dlogpsi_work_up; + GradMatrix dlogpsi_work_down; - ValueMatrix d2logpsi_work_up; - ValueMatrix d2logpsi_work_down; + ValueMatrix d2logpsi_work_up; + ValueMatrix d2logpsi_work_down; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/VariableSetT.cpp b/src/QMCWaveFunctions/VariableSetT.cpp index 2c494010666..1444dd4e9b9 100644 --- a/src/QMCWaveFunctions/VariableSetT.cpp +++ b/src/QMCWaveFunctions/VariableSetT.cpp @@ -29,313 +29,305 @@ using std::setw; namespace optimize { -template -void -VariableSetT::clear() +template +void VariableSetT::clear() { - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); + num_active_vars = 0; + Index.clear(); + NameAndValue.clear(); + Recompute.clear(); + ParameterType.clear(); } -template -void -VariableSetT::insertFrom(const VariableSetT& input) +template +void VariableSetT::insertFrom(const VariableSetT& input) { - for (int i = 0; i < input.size(); ++i) { - iterator loc = find(input.name(i)); - if (loc == NameAndValue.end()) { - Index.push_back(input.Index[i]); - NameAndValue.push_back(input.NameAndValue[i]); - ParameterType.push_back(input.ParameterType[i]); - Recompute.push_back(input.Recompute[i]); - } - else - (*loc).second = input.NameAndValue[i].second; + for (int i = 0; i < input.size(); ++i) + { + iterator loc = find(input.name(i)); + if (loc == NameAndValue.end()) + { + Index.push_back(input.Index[i]); + NameAndValue.push_back(input.NameAndValue[i]); + ParameterType.push_back(input.ParameterType[i]); + Recompute.push_back(input.Recompute[i]); } - num_active_vars = input.num_active_vars; + else + (*loc).second = input.NameAndValue[i].second; + } + num_active_vars = input.num_active_vars; } -template -void -VariableSetT::insertFromSum( - const VariableSetT& input_1, const VariableSetT& input_2) +template +void VariableSetT::insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2) { - value_type sum_val; - std::string vname; - - // Check that objects to be summed together have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error( - "Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) { - // Check that each of the equivalent variables in both VariableSet - // objects have the same name - otherwise we certainly shouldn't be - // adding them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error( - "Inconsistent parameters exist in the two provided " - "variable sets."); - - sum_val = - input_1.NameAndValue[i].second + input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't summed between the - // objects, but the parameter values themselves need to use the - // summed values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, sum_val)); - } - else - (*loc).second = sum_val; + value_type sum_val; + std::string vname; + + // Check that objects to be summed together have the same number of active + // variables. + if (input_1.num_active_vars != input_2.num_active_vars) + throw std::runtime_error("Inconsistent number of parameters in two provided " + "variable sets."); + + for (int i = 0; i < input_1.size(); ++i) + { + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // adding them. + if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) + throw std::runtime_error("Inconsistent parameters exist in the two provided " + "variable sets."); + + sum_val = input_1.NameAndValue[i].second + input_2.NameAndValue[i].second; + + iterator loc = find(input_1.name(i)); + if (loc == NameAndValue.end()) + { + Index.push_back(input_1.Index[i]); + ParameterType.push_back(input_1.ParameterType[i]); + Recompute.push_back(input_1.Recompute[i]); + + // We can reuse the above values, which aren't summed between the + // objects, but the parameter values themselves need to use the + // summed values. + vname = input_1.NameAndValue[i].first; + NameAndValue.push_back(pair_type(vname, sum_val)); } - num_active_vars = input_1.num_active_vars; + else + (*loc).second = sum_val; + } + num_active_vars = input_1.num_active_vars; } -template -void -VariableSetT::insertFromDiff( - const VariableSetT& input_1, const VariableSetT& input_2) +template +void VariableSetT::insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2) { - value_type diff_val; - std::string vname; - - // Check that objects to be subtracted have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error( - "Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) { - // Check that each of the equivalent variables in both VariableSet - // objects have the same name - otherwise we certainly shouldn't be - // subtracting them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error( - "Inconsistent parameters exist in the two provided " - "variable sets."); - - diff_val = - input_1.NameAndValue[i].second - input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't subtracted between - // the objects, but the parameter values themselves need to use the - // subtracted values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, diff_val)); - } - else - (*loc).second = diff_val; + value_type diff_val; + std::string vname; + + // Check that objects to be subtracted have the same number of active + // variables. + if (input_1.num_active_vars != input_2.num_active_vars) + throw std::runtime_error("Inconsistent number of parameters in two provided " + "variable sets."); + + for (int i = 0; i < input_1.size(); ++i) + { + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // subtracting them. + if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) + throw std::runtime_error("Inconsistent parameters exist in the two provided " + "variable sets."); + + diff_val = input_1.NameAndValue[i].second - input_2.NameAndValue[i].second; + + iterator loc = find(input_1.name(i)); + if (loc == NameAndValue.end()) + { + Index.push_back(input_1.Index[i]); + ParameterType.push_back(input_1.ParameterType[i]); + Recompute.push_back(input_1.Recompute[i]); + + // We can reuse the above values, which aren't subtracted between + // the objects, but the parameter values themselves need to use the + // subtracted values. + vname = input_1.NameAndValue[i].first; + NameAndValue.push_back(pair_type(vname, diff_val)); } - num_active_vars = input_1.num_active_vars; + else + (*loc).second = diff_val; + } + num_active_vars = input_1.num_active_vars; } -template -void -VariableSetT::removeInactive() +template +void VariableSetT::removeInactive() { - std::vector valid(Index); - std::vector acopy(NameAndValue); - std::vector bcopy(Recompute), ccopy(ParameterType); - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); - for (int i = 0; i < valid.size(); ++i) { - if (valid[i] > -1) { - Index.push_back(num_active_vars++); - NameAndValue.push_back(acopy[i]); - Recompute.push_back(bcopy[i]); - ParameterType.push_back(ccopy[i]); - } + std::vector valid(Index); + std::vector acopy(NameAndValue); + std::vector bcopy(Recompute), ccopy(ParameterType); + num_active_vars = 0; + Index.clear(); + NameAndValue.clear(); + Recompute.clear(); + ParameterType.clear(); + for (int i = 0; i < valid.size(); ++i) + { + if (valid[i] > -1) + { + Index.push_back(num_active_vars++); + NameAndValue.push_back(acopy[i]); + Recompute.push_back(bcopy[i]); + ParameterType.push_back(ccopy[i]); } + } } -template -void -VariableSetT::resetIndex() +template +void VariableSetT::resetIndex() { - num_active_vars = 0; - for (int i = 0; i < Index.size(); ++i) { - Index[i] = (Index[i] < 0) ? -1 : num_active_vars++; - } + num_active_vars = 0; + for (int i = 0; i < Index.size(); ++i) + { + Index[i] = (Index[i] < 0) ? -1 : num_active_vars++; + } } -template -void -VariableSetT::getIndex(const VariableSetT& selected) +template +void VariableSetT::getIndex(const VariableSetT& selected) { - num_active_vars = 0; - for (int i = 0; i < NameAndValue.size(); ++i) { - Index[i] = selected.getIndex(NameAndValue[i].first); - if (Index[i] >= 0) - num_active_vars++; - } + num_active_vars = 0; + for (int i = 0; i < NameAndValue.size(); ++i) + { + Index[i] = selected.getIndex(NameAndValue[i].first); + if (Index[i] >= 0) + num_active_vars++; + } } -template -int -VariableSetT::getIndex(const std::string& vname) const +template +int VariableSetT::getIndex(const std::string& vname) const { - int loc = 0; - while (loc != NameAndValue.size()) { - if (NameAndValue[loc].first == vname) - return Index[loc]; - ++loc; - } - return -1; + int loc = 0; + while (loc != NameAndValue.size()) + { + if (NameAndValue[loc].first == vname) + return Index[loc]; + ++loc; + } + return -1; } -template -void -VariableSetT::setIndexDefault() +template +void VariableSetT::setIndexDefault() { - for (int i = 0; i < Index.size(); ++i) - Index[i] = i; + for (int i = 0; i < Index.size(); ++i) + Index[i] = i; } -template -void -VariableSetT::print( - std::ostream& os, int leftPadSpaces, bool printHeader) const +template +void VariableSetT::print(std::ostream& os, int leftPadSpaces, bool printHeader) const { - std::string pad_str = std::string(leftPadSpaces, ' '); - int max_name_len = 0; - if (NameAndValue.size() > 0) - max_name_len = std::max_element(NameAndValue.begin(), - NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) { - return e1.first.length() < e2.first.length(); - })->first.length(); - - int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading - // value, period, and exponent. - int max_type_len = 1; - int max_recompute_len = 1; - int max_use_len = 3; - int max_index_len = 1; - if (printHeader) { - max_name_len = std::max(max_name_len, 4); // size of "Name" header - max_type_len = 4; - max_recompute_len = 9; - max_index_len = 5; - os << pad_str << setw(max_name_len) << "Name" - << " " << setw(max_value_len) << "Value" - << " " << setw(max_type_len) << "Type" - << " " << setw(max_recompute_len) << "Recompute" - << " " << setw(max_use_len) << "Use" - << " " << setw(max_index_len) << "Index" << std::endl; - os << pad_str << std::setfill('-') << setw(max_name_len) << "" - << " " << setw(max_value_len) << "" - << " " << setw(max_type_len) << "" - << " " << setw(max_recompute_len) << "" - << " " << setw(max_use_len) << "" - << " " << setw(max_index_len) << "" << std::endl; - os << std::setfill(' '); - } - - for (int i = 0; i < NameAndValue.size(); ++i) { - os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " - << std::setprecision(6) << std::scientific << setw(max_value_len) - << NameAndValue[i].second << " " << setw(max_type_len) - << ParameterType[i].second << " " << setw(max_recompute_len) - << Recompute[i].second << " "; - - os << std::defaultfloat; - - if (Index[i] < 0) - os << setw(max_use_len) << "OFF" << std::endl; - else - os << setw(max_use_len) << "ON" - << " " << setw(max_index_len) << Index[i] << std::endl; - } + std::string pad_str = std::string(leftPadSpaces, ' '); + int max_name_len = 0; + if (NameAndValue.size() > 0) + max_name_len = + std::max_element(NameAndValue.begin(), NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) { + return e1.first.length() < e2.first.length(); + })->first.length(); + + int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading + // value, period, and exponent. + int max_type_len = 1; + int max_recompute_len = 1; + int max_use_len = 3; + int max_index_len = 1; + if (printHeader) + { + max_name_len = std::max(max_name_len, 4); // size of "Name" header + max_type_len = 4; + max_recompute_len = 9; + max_index_len = 5; + os << pad_str << setw(max_name_len) << "Name" + << " " << setw(max_value_len) << "Value" + << " " << setw(max_type_len) << "Type" + << " " << setw(max_recompute_len) << "Recompute" + << " " << setw(max_use_len) << "Use" + << " " << setw(max_index_len) << "Index" << std::endl; + os << pad_str << std::setfill('-') << setw(max_name_len) << "" + << " " << setw(max_value_len) << "" + << " " << setw(max_type_len) << "" + << " " << setw(max_recompute_len) << "" + << " " << setw(max_use_len) << "" + << " " << setw(max_index_len) << "" << std::endl; + os << std::setfill(' '); + } + + for (int i = 0; i < NameAndValue.size(); ++i) + { + os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " << std::setprecision(6) << std::scientific + << setw(max_value_len) << NameAndValue[i].second << " " << setw(max_type_len) << ParameterType[i].second << " " + << setw(max_recompute_len) << Recompute[i].second << " "; + + os << std::defaultfloat; + + if (Index[i] < 0) + os << setw(max_use_len) << "OFF" << std::endl; + else + os << setw(max_use_len) << "ON" + << " " << setw(max_index_len) << Index[i] << std::endl; + } } -template -void -VariableSetT::writeToHDF( - const std::string& filename, qmcplusplus::hdf_archive& hout) const +template +void VariableSetT::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const { - hout.create(filename); - - // File Versioning - // 1.0.0 Initial file version - // 1.1.0 Files could have object-specific data from - // OptimizableObject::read/writeVariationalParameters - std::vector vp_file_version{1, 1, 0}; - hout.write(vp_file_version, "version"); - - std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); - hout.write(timestamp, "timestamp"); - - hout.push("name_value_lists"); - - std::vector param_values; - std::vector param_names; - for (auto& pair_it : NameAndValue) { - param_names.push_back(pair_it.first); - param_values.push_back(pair_it.second); - } - - hout.write(param_names, "parameter_names"); - hout.write(param_values, "parameter_values"); - hout.pop(); + hout.create(filename); + + // File Versioning + // 1.0.0 Initial file version + // 1.1.0 Files could have object-specific data from + // OptimizableObject::read/writeVariationalParameters + std::vector vp_file_version{1, 1, 0}; + hout.write(vp_file_version, "version"); + + std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); + hout.write(timestamp, "timestamp"); + + hout.push("name_value_lists"); + + std::vector param_values; + std::vector param_names; + for (auto& pair_it : NameAndValue) + { + param_names.push_back(pair_it.first); + param_values.push_back(pair_it.second); + } + + hout.write(param_names, "parameter_names"); + hout.write(param_values, "parameter_values"); + hout.pop(); } -template -void -VariableSetT::readFromHDF( - const std::string& filename, qmcplusplus::hdf_archive& hin) +template +void VariableSetT::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin) { - if (!hin.open(filename, H5F_ACC_RDONLY)) { - std::ostringstream err_msg; - err_msg << "Unable to open VP file: " << filename; - throw std::runtime_error(err_msg.str()); - } - - try { - hin.push("name_value_lists", false); - } - catch (std::runtime_error&) { - std::ostringstream err_msg; - err_msg << "The group name_value_lists in not present in file: " - << filename; - throw std::runtime_error(err_msg.str()); - } - - std::vector param_values; - hin.read(param_values, "parameter_values"); - - std::vector param_names; - hin.read(param_names, "parameter_names"); - - for (int i = 0; i < param_names.size(); i++) { - std::string& vp_name = param_names[i]; - // Find and set values by name. - // Values that are not present do not get added. - if (find(vp_name) != end()) - (*this)[vp_name] = param_values[i]; - } - - hin.pop(); + if (!hin.open(filename, H5F_ACC_RDONLY)) + { + std::ostringstream err_msg; + err_msg << "Unable to open VP file: " << filename; + throw std::runtime_error(err_msg.str()); + } + + try + { + hin.push("name_value_lists", false); + } + catch (std::runtime_error&) + { + std::ostringstream err_msg; + err_msg << "The group name_value_lists in not present in file: " << filename; + throw std::runtime_error(err_msg.str()); + } + + std::vector param_values; + hin.read(param_values, "parameter_values"); + + std::vector param_names; + hin.read(param_names, "parameter_names"); + + for (int i = 0; i < param_names.size(); i++) + { + std::string& vp_name = param_names[i]; + // Find and set values by name. + // Values that are not present do not get added. + if (find(vp_name) != end()) + (*this)[vp_name] = param_values[i]; + } + + hin.pop(); } template class VariableSetT; diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp index 5ada9b4f9d3..13dd42bb256 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp @@ -15,109 +15,106 @@ namespace qmcplusplus { -template -ConstantSPOSetT::ConstantSPOSetT( - const std::string& my_name, const int nparticles, const int norbitals) : - SPOSetT(my_name), - numparticles_(nparticles) +template +ConstantSPOSetT::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals) + : SPOSetT(my_name), numparticles_(nparticles) { - this->OrbitalSetSize = norbitals; - ref_psi_.resize(numparticles_, this->OrbitalSetSize); - ref_egrad_.resize(numparticles_, this->OrbitalSetSize); - ref_elapl_.resize(numparticles_, this->OrbitalSetSize); + this->OrbitalSetSize = norbitals; + ref_psi_.resize(numparticles_, this->OrbitalSetSize); + ref_egrad_.resize(numparticles_, this->OrbitalSetSize); + ref_elapl_.resize(numparticles_, this->OrbitalSetSize); - ref_psi_ = 0.0; - ref_egrad_ = 0.0; - ref_elapl_ = 0.0; + ref_psi_ = 0.0; + ref_egrad_ = 0.0; + ref_elapl_ = 0.0; } -template -std::unique_ptr> -ConstantSPOSetT::makeClone() const +template +std::unique_ptr> ConstantSPOSetT::makeClone() const { - auto myclone = std::make_unique>( - this->my_name_, numparticles_, this->OrbitalSetSize); - myclone->setRefVals(ref_psi_); - myclone->setRefEGrads(ref_egrad_); - myclone->setRefELapls(ref_elapl_); - return myclone; + auto myclone = std::make_unique>(this->my_name_, numparticles_, this->OrbitalSetSize); + myclone->setRefVals(ref_psi_); + myclone->setRefEGrads(ref_egrad_); + myclone->setRefELapls(ref_elapl_); + return myclone; } template void ConstantSPOSetT::checkOutVariables(const OptVariablesTypeT& active) { - APP_ABORT("ConstantSPOSet should not call checkOutVariables"); + APP_ABORT("ConstantSPOSet should not call checkOutVariables"); }; -template -void -ConstantSPOSetT::setOrbitalSetSize(int norbs) +template +void ConstantSPOSetT::setOrbitalSetSize(int norbs) { - APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); + APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); } -template -void -ConstantSPOSetT::setRefVals(const ValueMatrix& vals) +template +void ConstantSPOSetT::setRefVals(const ValueMatrix& vals) { - assert(vals.cols() == this->OrbitalSetSize); - assert(vals.rows() == numparticles_); - ref_psi_ = vals; + assert(vals.cols() == this->OrbitalSetSize); + assert(vals.rows() == numparticles_); + ref_psi_ = vals; } -template -void -ConstantSPOSetT::setRefEGrads(const GradMatrix& grads) +template +void ConstantSPOSetT::setRefEGrads(const GradMatrix& grads) { - assert(grads.cols() == this->OrbitalSetSize); - assert(grads.rows() == numparticles_); - ref_egrad_ = grads; + assert(grads.cols() == this->OrbitalSetSize); + assert(grads.rows() == numparticles_); + ref_egrad_ = grads; } -template -void -ConstantSPOSetT::setRefELapls(const ValueMatrix& lapls) +template +void ConstantSPOSetT::setRefELapls(const ValueMatrix& lapls) { - assert(lapls.cols() == this->OrbitalSetSize); - assert(lapls.rows() == numparticles_); - ref_elapl_ = lapls; + assert(lapls.cols() == this->OrbitalSetSize); + assert(lapls.rows() == numparticles_); + ref_elapl_ = lapls; } -template -void -ConstantSPOSetT::evaluateValue( - const ParticleSetT& P, int iat, ValueVector& psi) +template +void ConstantSPOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - const auto* vp = dynamic_cast*>(&P); - int ptcl = vp ? vp->refPtcl : iat; - assert(psi.size() == this->OrbitalSetSize); - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) - psi[iorb] = ref_psi_(ptcl, iorb); + const auto* vp = dynamic_cast*>(&P); + int ptcl = vp ? vp->refPtcl : iat; + assert(psi.size() == this->OrbitalSetSize); + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + psi[iorb] = ref_psi_(ptcl, iorb); } -template -void -ConstantSPOSetT::evaluateVGL(const ParticleSetT& P, int iat, - ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void ConstantSPOSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { - for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) { - psi[iorb] = ref_psi_(iat, iorb); - dpsi[iorb] = ref_egrad_(iat, iorb); - d2psi[iorb] = ref_elapl_(iat, iorb); - } + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + { + psi[iorb] = ref_psi_(iat, iorb); + dpsi[iorb] = ref_egrad_(iat, iorb); + d2psi[iorb] = ref_elapl_(iat, iorb); + } } -template -void -ConstantSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, - int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +template +void ConstantSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + evaluateVGL(P, iat, v, g, l); + } } template class ConstantSPOSetT; diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h index 335796df964..1a23473b196 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h @@ -23,85 +23,75 @@ namespace qmcplusplus * deterministic and known output to objects requiring SPOSet evaluations. * */ -template +template class ConstantSPOSetT : public SPOSetT { public: - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradMatrix = typename SPOSetT::GradMatrix; - using ValueVector = typename SPOSetT::ValueVector; - using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; - ConstantSPOSetT(const std::string& my_name) = delete; + ConstantSPOSetT(const std::string& my_name) = delete; - // Constructor needs number of particles and number of orbitals. This is - // the minimum amount of information needed to sanely construct all data - // members and perform size checks later. - ConstantSPOSetT( - const std::string& my_name, const int nparticles, const int norbitals); + // Constructor needs number of particles and number of orbitals. This is + // the minimum amount of information needed to sanely construct all data + // members and perform size checks later. + ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals); - std::unique_ptr> - makeClone() const final; + std::unique_ptr> makeClone() const final; - std::string - getClassName() const final - { - return "ConstantSPOSet"; - }; + std::string getClassName() const final { return "ConstantSPOSet"; }; - void checkOutVariables(const OptVariablesTypeT& active) final; + void checkOutVariables(const OptVariablesTypeT& active) final; - void - setOrbitalSetSize(int norbs) final; + void setOrbitalSetSize(int norbs) final; - /** + /** * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. * @param Nelec x Nion ValueType matrix of \phi_j(r_i) * @return void */ - void - setRefVals(const ValueMatrix& vals); - /** + void setRefVals(const ValueMatrix& vals); + /** * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in * ref_egrad_. * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) * @return void */ - void - setRefEGrads(const GradMatrix& grads); - /** + void setRefEGrads(const GradMatrix& grads); + /** * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix * in ref_elapl_. * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) * @return void */ - void - setRefELapls(const ValueMatrix& lapls); + void setRefELapls(const ValueMatrix& lapls); - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) final; + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; private: - const int numparticles_; /// evaluate_notranspose arrays are nparticle x - /// norb matrices. To ensure consistent array - /// sizing and enforcement, we agree at - /// construction how large these matrices will be. - /// norb is stored in SPOSet::OrbitalSetSize. - - // Value, electron gradient, and electron laplacian at "reference - // configuration". i.e. before any attempted moves. - - ValueMatrix ref_psi_; - GradMatrix ref_egrad_; - ValueMatrix ref_elapl_; + const int numparticles_; /// evaluate_notranspose arrays are nparticle x + /// norb matrices. To ensure consistent array + /// sizing and enforcement, we agree at + /// construction how large these matrices will be. + /// norb is stored in SPOSet::OrbitalSetSize. + + // Value, electron gradient, and electron laplacian at "reference + // configuration". i.e. before any attempted moves. + + ValueMatrix ref_psi_; + GradMatrix ref_egrad_; + ValueMatrix ref_elapl_; }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp index 85678ce5f39..2f5e486082a 100644 --- a/src/QMCWaveFunctions/tests/FakeSPOT.cpp +++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp @@ -13,135 +13,141 @@ namespace qmcplusplus { -template +template FakeSPOT::FakeSPOT() : SPOSetT("one_FakeSPO") { - a.resize(3, 3); + a.resize(3, 3); - a(0, 0) = 2.3; - a(0, 1) = 4.5; - a(0, 2) = 2.6; - a(1, 0) = 0.5; - a(1, 1) = 8.5; - a(1, 2) = 3.3; - a(2, 0) = 1.8; - a(2, 1) = 4.4; - a(2, 2) = 4.9; + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; - v.resize(3); - v[0] = 1.9; - v[1] = 2.0; - v[2] = 3.1; + v.resize(3); + v[0] = 1.9; + v[1] = 2.0; + v[2] = 3.1; - a2.resize(4, 4); - a2(0, 0) = 2.3; - a2(0, 1) = 4.5; - a2(0, 2) = 2.6; - a2(0, 3) = 1.2; - a2(1, 0) = 0.5; - a2(1, 1) = 8.5; - a2(1, 2) = 3.3; - a2(1, 3) = 0.3; - a2(2, 0) = 1.8; - a2(2, 1) = 4.4; - a2(2, 2) = 4.9; - a2(2, 3) = 2.8; - a2(3, 0) = 0.8; - a2(3, 1) = 4.1; - a2(3, 2) = 3.2; - a2(3, 3) = 1.1; + a2.resize(4, 4); + a2(0, 0) = 2.3; + a2(0, 1) = 4.5; + a2(0, 2) = 2.6; + a2(0, 3) = 1.2; + a2(1, 0) = 0.5; + a2(1, 1) = 8.5; + a2(1, 2) = 3.3; + a2(1, 3) = 0.3; + a2(2, 0) = 1.8; + a2(2, 1) = 4.4; + a2(2, 2) = 4.9; + a2(2, 3) = 2.8; + a2(3, 0) = 0.8; + a2(3, 1) = 4.1; + a2(3, 2) = 3.2; + a2(3, 3) = 1.1; - v2.resize(4, 4); + v2.resize(4, 4); - v2(0, 0) = 3.2; - v2(0, 1) = 0.5; - v2(0, 2) = 5.9; - v2(0, 3) = 3.7; - v2(1, 0) = 0.3; - v2(1, 1) = 1.4; - v2(1, 2) = 3.9; - v2(1, 3) = 8.2; - v2(2, 0) = 3.3; - v2(2, 1) = 5.4; - v2(2, 2) = 4.9; - v2(2, 3) = 2.2; - v2(3, 1) = 5.4; - v2(3, 2) = 4.9; - v2(3, 3) = 2.2; + v2(0, 0) = 3.2; + v2(0, 1) = 0.5; + v2(0, 2) = 5.9; + v2(0, 3) = 3.7; + v2(1, 0) = 0.3; + v2(1, 1) = 1.4; + v2(1, 2) = 3.9; + v2(1, 3) = 8.2; + v2(2, 0) = 3.3; + v2(2, 1) = 5.4; + v2(2, 2) = 4.9; + v2(2, 3) = 2.2; + v2(3, 1) = 5.4; + v2(3, 2) = 4.9; + v2(3, 3) = 2.2; - gv.resize(4); - gv[0] = GradType(1.0, 0.0, 0.1); - gv[1] = GradType(1.0, 2.0, 0.1); - gv[2] = GradType(2.0, 1.0, 0.1); - gv[3] = GradType(0.4, 0.3, 0.1); + gv.resize(4); + gv[0] = GradType(1.0, 0.0, 0.1); + gv[1] = GradType(1.0, 2.0, 0.1); + gv[2] = GradType(2.0, 1.0, 0.1); + gv[3] = GradType(0.4, 0.3, 0.1); } -template -std::unique_ptr> -FakeSPOT::makeClone() const +template +std::unique_ptr> FakeSPOT::makeClone() const { - return std::make_unique>(*this); + return std::make_unique>(*this); } -template -void -FakeSPOT::setOrbitalSetSize(int norbs) +template +void FakeSPOT::setOrbitalSetSize(int norbs) { - this->OrbitalSetSize = norbs; + this->OrbitalSetSize = norbs; } -template -void -FakeSPOT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +template +void FakeSPOT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { - if (iat < 0) - for (int i = 0; i < psi.size(); i++) - psi[i] = 1.2 * i - i * i; - else if (this->OrbitalSetSize == 3) - for (int i = 0; i < 3; i++) - psi[i] = a(iat, i); - else if (this->OrbitalSetSize == 4) - for (int i = 0; i < 4; i++) - psi[i] = a2(iat, i); + if (iat < 0) + for (int i = 0; i < psi.size(); i++) + psi[i] = 1.2 * i - i * i; + else if (this->OrbitalSetSize == 3) + for (int i = 0; i < 3; i++) + psi[i] = a(iat, i); + else if (this->OrbitalSetSize == 4) + for (int i = 0; i < 4; i++) + psi[i] = a2(iat, i); } -template -void -FakeSPOT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) +template +void FakeSPOT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { - if (this->OrbitalSetSize == 3) { - for (int i = 0; i < 3; i++) { - psi[i] = v[i]; - dpsi[i] = gv[i]; - } + if (this->OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + { + psi[i] = v[i]; + dpsi[i] = gv[i]; } - else if (this->OrbitalSetSize == 4) { - for (int i = 0; i < 4; i++) { - psi[i] = v2(iat, i); - dpsi[i] = gv[i]; - } + } + else if (this->OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + { + psi[i] = v2(iat, i); + dpsi[i] = gv[i]; } + } } -template -void -FakeSPOT::evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) +template +void FakeSPOT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { - if (this->OrbitalSetSize == 3) { - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) { - logdet(j, i) = a(i, j); - dlogdet[i][j] = gv[j] + GradType(i); - } - } - else if (this->OrbitalSetSize == 4) { - for (int i = 0; i < 4; i++) - for (int j = 0; j < 4; j++) { - logdet(j, i) = a2(i, j); - dlogdet[i][j] = gv[j] + GradType(i); - } - } + if (this->OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + { + logdet(j, i) = a(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } + else if (this->OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + { + logdet(j, i) = a2(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } } // Class concrete types from ValueType diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h index f0a6f1ef80a..6603f8f52a1 100644 --- a/src/QMCWaveFunctions/tests/FakeSPOT.h +++ b/src/QMCWaveFunctions/tests/FakeSPOT.h @@ -16,55 +16,45 @@ namespace qmcplusplus { -template +template class FakeSPOT : public SPOSetT { public: - Matrix a; - Matrix a2; - Vector v; - Matrix v2; + Matrix a; + Matrix a2; + Vector v; + Matrix v2; - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - using GradType = typename SPOSetT::GradType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; - typename SPOSetT::GradVector gv; + typename SPOSetT::GradVector gv; - FakeSPOT(); + FakeSPOT(); - ~FakeSPOT() override = default; + ~FakeSPOT() override = default; - std::string - getClassName() const override - { - return "FakeSPO"; - } + std::string getClassName() const override { return "FakeSPO"; } - std::unique_ptr> - makeClone() const override; + std::unique_ptr> makeClone() const override; - virtual void - report() - { - } + virtual void report() {} - void - setOrbitalSetSize(int norbs) override; + void setOrbitalSetSize(int norbs) override; - void - evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override; + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp index 708e4780535..43e3b2e8f1d 100644 --- a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp @@ -34,53 +34,29 @@ using std::string; namespace qmcplusplus { -template +template struct ValueApproxHelper { - using Type = Catch::Detail::Approx; + using Type = Catch::Detail::Approx; }; -template +template struct ValueApproxHelper> { - using Type = Catch::Detail::ComplexApprox; + using Type = Catch::Detail::ComplexApprox; }; -template +template using ValueApprox = typename ValueApproxHelper::Type; namespace testing { -OptVariablesTypeT& -getMyVars(SPOSetT& rot) -{ - return rot.myVars; -} -OptVariablesTypeT& -getMyVars(SPOSetT& rot) -{ - return rot.myVars; -} -OptVariablesTypeT& -getMyVarsFull(RotatedSPOsT& rot) -{ - return rot.myVarsFull; -} -OptVariablesTypeT& -getMyVarsFull(RotatedSPOsT& rot) -{ - return rot.myVarsFull; -} -std::vector>& -getHistoryParams(RotatedSPOsT& rot) -{ - return rot.history_params_; -} +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; } +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; } +std::vector>& getHistoryParams(RotatedSPOsT& rot) { return rot.history_params_; } -std::vector>& -getHistoryParams(RotatedSPOsT& rot) -{ - return rot.history_params_; -} +std::vector>& getHistoryParams(RotatedSPOsT& rot) { return rot.history_params_; } } // namespace testing #ifndef QMC_COMPLEX @@ -97,119 +73,104 @@ using TestTypeList = std::tuple<>; JPT 04.01.2022: Adapted from test_einset.cpp Test the spline rotated machinery for SplineR2R (extend to others later). */ -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList) { - using RealType = typename SPOSetT::RealType; + using RealType = typename SPOSetT::RealType; - /* + /* BEGIN Boilerplate stuff to make a simple SPOSet. Copied from test_einset.cpp */ - Communicate* c = OHMMS::Controller; - - // We get a "Mismatched supercell lattices" error due to default ctor? - typename ParticleSetT::ParticleLayout lattice; - - // diamondC_1x1x1 - lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, - 3.37316115, 0.0, 3.37316115}; - - ParticleSetPoolT ptcl = ParticleSetPoolT(c); - ptcl.setSimulationCell(lattice); - // LAttice seems fine after this point... - - auto ions_uptr = - std::make_unique>(ptcl.getSimulationCell()); - auto elec_uptr = - std::make_unique>(ptcl.getSimulationCell()); - ParticleSetT& ions_(*ions_uptr); - ParticleSetT& elec_(*elec_uptr); - - ions_.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions_.create({2}); - ions_.R[0] = {0.0, 0.0, 0.0}; - ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; - elec_.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec_.create({2}); - elec_.R[0] = {0.0, 0.0, 0.0}; - elec_.R[1] = {0.0, 1.0, 0.0}; - SpeciesSet& tspecies = elec_.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - // diamondC_1x1x1 - 8 bands available - const char* particles = R"( + Communicate* c = OHMMS::Controller; + + // We get a "Mismatched supercell lattices" error due to default ctor? + typename ParticleSetT::ParticleLayout lattice; + + // diamondC_1x1x1 + lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, 3.37316115, 0.0, 3.37316115}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + // LAttice seems fine after this point... + + auto ions_uptr = std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions_(*ions_uptr); + ParticleSetT& elec_(*elec_uptr); + + ions_.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions_.create({2}); + ions_.R[0] = {0.0, 0.0, 0.0}; + ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; + elec_.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec_.create({2}); + elec_.R[0] = {0.0, 0.0, 0.0}; + elec_.R[1] = {0.0, 1.0, 0.0}; + SpeciesSet& tspecies = elec_.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // diamondC_1x1x1 - 8 bands available + const char* particles = R"( )"; - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); - xmlNodePtr root = doc.getRoot(); + xmlNodePtr root = doc.getRoot(); - xmlNodePtr ein1 = xmlFirstElementChild(root); + xmlNodePtr ein1 = xmlFirstElementChild(root); - EinsplineSetBuilderT einSet(elec_, ptcl.getPool(), c, ein1); - auto spo = einSet.createSPOSetFromXML(ein1); - REQUIRE(spo); + EinsplineSetBuilderT einSet(elec_, ptcl.getPool(), c, ein1); + auto spo = einSet.createSPOSetFromXML(ein1); + REQUIRE(spo); - /* + /* END Boilerplate stuff. Now we have a SplineR2R wavefunction ready for rotation. What follows is the actual test. */ - // SplineR2R only for the moment, so skip if QMC_COMPLEX is set + // SplineR2R only for the moment, so skip if QMC_COMPLEX is set #if !defined(QMC_COMPLEX) - spo->storeParamsBeforeRotation(); - // 1.) Make a RotatedSPOs object so that we can use the rotation routines - auto rot_spo = std::make_unique>( - "one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 8); - - // 2.) Get data for unrotated orbitals. Check that there's no rotation - rot_spo->buildOptVariables(elec_.R.size()); - typename SPOSetT::ValueMatrix psiM_bare( - elec_.R.size(), orbitalsetsize); - typename SPOSetT::GradMatrix dpsiM_bare( - elec_.R.size(), orbitalsetsize); - typename SPOSetT::ValueMatrix d2psiM_bare( - elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose( - elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // This stuff checks that no rotation was applied. Copied from - // test_einset.cpp. value - CHECK(std::real(psiM_bare[1][0]) == ValueApprox(-0.8886948824)); - CHECK(std::real(psiM_bare[1][1]) == ValueApprox(1.4194120169)); - // grad - CHECK( - std::real(dpsiM_bare[1][0][0]) == ValueApprox(-0.0000183403)); - CHECK( - std::real(dpsiM_bare[1][0][1]) == ValueApprox(0.1655139178)); - CHECK( - std::real(dpsiM_bare[1][0][2]) == ValueApprox(-0.0000193077)); - CHECK( - std::real(dpsiM_bare[1][1][0]) == ValueApprox(-1.3131694794)); - CHECK( - std::real(dpsiM_bare[1][1][1]) == ValueApprox(-1.1174004078)); - CHECK( - std::real(dpsiM_bare[1][1][2]) == ValueApprox(-0.8462534547)); - // lapl - CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox(1.3313053846)); - CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox(-4.712583065)); - - /* + spo->storeParamsBeforeRotation(); + // 1.) Make a RotatedSPOs object so that we can use the rotation routines + auto rot_spo = std::make_unique>("one_rotated_set", std::move(spo)); + + // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 8); + + // 2.) Get data for unrotated orbitals. Check that there's no rotation + rot_spo->buildOptVariables(elec_.R.size()); + typename SPOSetT::ValueMatrix psiM_bare(elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare(elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare(elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // This stuff checks that no rotation was applied. Copied from + // test_einset.cpp. value + CHECK(std::real(psiM_bare[1][0]) == ValueApprox(-0.8886948824)); + CHECK(std::real(psiM_bare[1][1]) == ValueApprox(1.4194120169)); + // grad + CHECK(std::real(dpsiM_bare[1][0][0]) == ValueApprox(-0.0000183403)); + CHECK(std::real(dpsiM_bare[1][0][1]) == ValueApprox(0.1655139178)); + CHECK(std::real(dpsiM_bare[1][0][2]) == ValueApprox(-0.0000193077)); + CHECK(std::real(dpsiM_bare[1][1][0]) == ValueApprox(-1.3131694794)); + CHECK(std::real(dpsiM_bare[1][1][1]) == ValueApprox(-1.1174004078)); + CHECK(std::real(dpsiM_bare[1][1][2]) == ValueApprox(-0.8462534547)); + // lapl + CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox(1.3313053846)); + CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox(-4.712583065)); + + /* 3.) Apply a rotation to the orbitals To do this, construct a params vector and call the RotatedSPOs::apply_rotation(params) method. That should do the @@ -217,26 +178,22 @@ TEMPLATE_LIST_TEST_CASE( For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params. */ - const auto rot_size = rot_spo->m_act_rot_inds.size(); - REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 - std::vector param(rot_size); - for (auto i = 0; i < rot_size; i++) { - param[i] = 0.01 * static_cast(i); - } - rot_spo->apply_rotation( - param, false); // Expect this to call SplineR2R::applyRotation() - - // 4.) Get data for rotated orbitals. - typename SPOSetT::ValueMatrix psiM_rot( - elec_.R.size(), orbitalsetsize); - typename SPOSetT::GradMatrix dpsiM_rot( - elec_.R.size(), orbitalsetsize); - typename SPOSetT::ValueMatrix d2psiM_rot( - elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose( - elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); - - /* + const auto rot_size = rot_spo->m_act_rot_inds.size(); + REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 + std::vector param(rot_size); + for (auto i = 0; i < rot_size; i++) + { + param[i] = 0.01 * static_cast(i); + } + rot_spo->apply_rotation(param, false); // Expect this to call SplineR2R::applyRotation() + + // 4.) Get data for rotated orbitals. + typename SPOSetT::ValueMatrix psiM_rot(elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_rot(elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_rot(elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); + + /* Manually encode the unitary transformation. Ugly, but it works. @TODO: Use the total rotation machinery when it's implemented @@ -246,232 +203,217 @@ TEMPLATE_LIST_TEST_CASE( probably be ditched once we have a way to grab the actual rotation matrix... */ - typename SPOSetT::ValueMatrix rot_mat( - orbitalsetsize, orbitalsetsize); - rot_mat[0][0] = 0.99726; - rot_mat[0][1] = -0.00722; - rot_mat[0][2] = 0.00014; - rot_mat[0][3] = -0.00982; - rot_mat[0][4] = -0.01979; - rot_mat[0][5] = -0.02976; - rot_mat[0][6] = -0.03972; - rot_mat[0][7] = -0.04969; - rot_mat[1][0] = -0.00722; - rot_mat[1][1] = 0.97754; - rot_mat[1][2] = -0.05955; - rot_mat[1][3] = -0.06945; - rot_mat[1][4] = -0.07935; - rot_mat[1][5] = -0.08925; - rot_mat[1][6] = -0.09915; - rot_mat[1][7] = -0.10905; - rot_mat[2][0] = -0.00014; - rot_mat[2][1] = 0.05955; - rot_mat[2][2] = 0.99821; - rot_mat[2][3] = -0.00209; - rot_mat[2][4] = -0.00239; - rot_mat[2][5] = -0.00269; - rot_mat[2][6] = -0.00299; - rot_mat[2][7] = -0.00329; - rot_mat[3][0] = 0.00982; - rot_mat[3][1] = 0.06945; - rot_mat[3][2] = -0.00209; - rot_mat[3][3] = 0.99751; - rot_mat[3][4] = -0.00289; - rot_mat[3][5] = -0.00329; - rot_mat[3][6] = -0.00368; - rot_mat[3][7] = -0.00408; - rot_mat[4][0] = 0.01979; - rot_mat[4][1] = 0.07935; - rot_mat[4][2] = -0.00239; - rot_mat[4][3] = -0.00289; - rot_mat[4][4] = 0.99661; - rot_mat[4][5] = -0.00388; - rot_mat[4][6] = -0.00438; - rot_mat[4][7] = -0.00488; - rot_mat[5][0] = 0.02976; - rot_mat[5][1] = 0.08925; - rot_mat[5][2] = -0.00269; - rot_mat[5][3] = -0.00329; - rot_mat[5][4] = -0.00388; - rot_mat[5][5] = 0.99552; - rot_mat[5][6] = -0.00508; - rot_mat[5][7] = -0.00568; - rot_mat[6][0] = 0.03972; - rot_mat[6][1] = 0.09915; - rot_mat[6][2] = -0.00299; - rot_mat[6][3] = -0.00368; - rot_mat[6][4] = -0.00438; - rot_mat[6][5] = -0.00508; - rot_mat[6][6] = 0.99422; - rot_mat[6][7] = -0.00647; - rot_mat[7][0] = 0.04969; - rot_mat[7][1] = 0.10905; - rot_mat[7][2] = -0.00329; - rot_mat[7][3] = -0.00408; - rot_mat[7][4] = -0.00488; - rot_mat[7][5] = -0.00568; - rot_mat[7][6] = -0.00647; - rot_mat[7][7] = 0.99273; - - // Now compute the expected values by hand using the transformation above - double val1 = 0.; - double val2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) { - val1 += psiM_bare[0][i] * rot_mat[i][0]; - val2 += psiM_bare[1][i] * rot_mat[i][0]; - } - - // value - CHECK(std::real(psiM_rot[0][0]) == ValueApprox(val1)); - CHECK(std::real(psiM_rot[1][0]) == ValueApprox(val2)); - - std::vector grad1(3); - std::vector grad2(3); - for (auto j = 0; j < grad1.size(); j++) { - for (auto i = 0; i < rot_mat.size1(); i++) { - grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; - grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; - } - } - - // grad - CHECK( - dpsiM_rot[0][0][0] == ValueApprox(grad1[0]).epsilon(0.0001)); - CHECK( - dpsiM_rot[0][0][1] == ValueApprox(grad1[1]).epsilon(0.0001)); - CHECK( - dpsiM_rot[0][0][2] == ValueApprox(grad1[2]).epsilon(0.0001)); - CHECK( - dpsiM_rot[1][0][0] == ValueApprox(grad2[0]).epsilon(0.0001)); - CHECK( - dpsiM_rot[1][0][1] == ValueApprox(grad2[1]).epsilon(0.0001)); - CHECK( - dpsiM_rot[1][0][2] == ValueApprox(grad2[2]).epsilon(0.0001)); - - double lap1 = 0.; - double lap2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) { - lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; - lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; + typename SPOSetT::ValueMatrix rot_mat(orbitalsetsize, orbitalsetsize); + rot_mat[0][0] = 0.99726; + rot_mat[0][1] = -0.00722; + rot_mat[0][2] = 0.00014; + rot_mat[0][3] = -0.00982; + rot_mat[0][4] = -0.01979; + rot_mat[0][5] = -0.02976; + rot_mat[0][6] = -0.03972; + rot_mat[0][7] = -0.04969; + rot_mat[1][0] = -0.00722; + rot_mat[1][1] = 0.97754; + rot_mat[1][2] = -0.05955; + rot_mat[1][3] = -0.06945; + rot_mat[1][4] = -0.07935; + rot_mat[1][5] = -0.08925; + rot_mat[1][6] = -0.09915; + rot_mat[1][7] = -0.10905; + rot_mat[2][0] = -0.00014; + rot_mat[2][1] = 0.05955; + rot_mat[2][2] = 0.99821; + rot_mat[2][3] = -0.00209; + rot_mat[2][4] = -0.00239; + rot_mat[2][5] = -0.00269; + rot_mat[2][6] = -0.00299; + rot_mat[2][7] = -0.00329; + rot_mat[3][0] = 0.00982; + rot_mat[3][1] = 0.06945; + rot_mat[3][2] = -0.00209; + rot_mat[3][3] = 0.99751; + rot_mat[3][4] = -0.00289; + rot_mat[3][5] = -0.00329; + rot_mat[3][6] = -0.00368; + rot_mat[3][7] = -0.00408; + rot_mat[4][0] = 0.01979; + rot_mat[4][1] = 0.07935; + rot_mat[4][2] = -0.00239; + rot_mat[4][3] = -0.00289; + rot_mat[4][4] = 0.99661; + rot_mat[4][5] = -0.00388; + rot_mat[4][6] = -0.00438; + rot_mat[4][7] = -0.00488; + rot_mat[5][0] = 0.02976; + rot_mat[5][1] = 0.08925; + rot_mat[5][2] = -0.00269; + rot_mat[5][3] = -0.00329; + rot_mat[5][4] = -0.00388; + rot_mat[5][5] = 0.99552; + rot_mat[5][6] = -0.00508; + rot_mat[5][7] = -0.00568; + rot_mat[6][0] = 0.03972; + rot_mat[6][1] = 0.09915; + rot_mat[6][2] = -0.00299; + rot_mat[6][3] = -0.00368; + rot_mat[6][4] = -0.00438; + rot_mat[6][5] = -0.00508; + rot_mat[6][6] = 0.99422; + rot_mat[6][7] = -0.00647; + rot_mat[7][0] = 0.04969; + rot_mat[7][1] = 0.10905; + rot_mat[7][2] = -0.00329; + rot_mat[7][3] = -0.00408; + rot_mat[7][4] = -0.00488; + rot_mat[7][5] = -0.00568; + rot_mat[7][6] = -0.00647; + rot_mat[7][7] = 0.99273; + + // Now compute the expected values by hand using the transformation above + double val1 = 0.; + double val2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) + { + val1 += psiM_bare[0][i] * rot_mat[i][0]; + val2 += psiM_bare[1][i] * rot_mat[i][0]; + } + + // value + CHECK(std::real(psiM_rot[0][0]) == ValueApprox(val1)); + CHECK(std::real(psiM_rot[1][0]) == ValueApprox(val2)); + + std::vector grad1(3); + std::vector grad2(3); + for (auto j = 0; j < grad1.size(); j++) + { + for (auto i = 0; i < rot_mat.size1(); i++) + { + grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; + grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; } - - // Lapl - CHECK(std::real(d2psiM_rot[0][0]) == - ValueApprox(lap1).epsilon(0.0001)); - CHECK(std::real(d2psiM_rot[1][0]) == - ValueApprox(lap2).epsilon(0.0001)); + } + + // grad + CHECK(dpsiM_rot[0][0][0] == ValueApprox(grad1[0]).epsilon(0.0001)); + CHECK(dpsiM_rot[0][0][1] == ValueApprox(grad1[1]).epsilon(0.0001)); + CHECK(dpsiM_rot[0][0][2] == ValueApprox(grad1[2]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][0] == ValueApprox(grad2[0]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][1] == ValueApprox(grad2[1]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][2] == ValueApprox(grad2[2]).epsilon(0.0001)); + + double lap1 = 0.; + double lap2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) + { + lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; + lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; + } + + // Lapl + CHECK(std::real(d2psiM_rot[0][0]) == ValueApprox(lap1).epsilon(0.0001)); + CHECK(std::real(d2psiM_rot[1][0]) == ValueApprox(lap2).epsilon(0.0001)); #endif } -TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices", - "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices", "[wavefunction][template]", TestTypeList) { - // No active-active or virtual-virtual rotations - // Only active-virtual - typename RotatedSPOsT::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); - CHECK(rot_ind.size() == 2); - - // Full rotation contains all rotations - // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 - typename RotatedSPOsT::RotationIndices full_rot_ind; - RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); - CHECK(full_rot_ind.size() == 3); - - nel = 2; - typename RotatedSPOsT::RotationIndices rot_ind2; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind2); - CHECK(rot_ind2.size() == 2); - - typename RotatedSPOsT::RotationIndices full_rot_ind2; - RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind2); - CHECK(full_rot_ind2.size() == 3); - - nmo = 4; - typename RotatedSPOsT::RotationIndices rot_ind3; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind3); - CHECK(rot_ind3.size() == 4); - - typename RotatedSPOsT::RotationIndices full_rot_ind3; - RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind3); - CHECK(full_rot_ind3.size() == 6); + // No active-active or virtual-virtual rotations + // Only active-virtual + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + CHECK(rot_ind.size() == 2); + + // Full rotation contains all rotations + // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + CHECK(full_rot_ind.size() == 3); + + nel = 2; + typename RotatedSPOsT::RotationIndices rot_ind2; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind2); + CHECK(rot_ind2.size() == 2); + + typename RotatedSPOsT::RotationIndices full_rot_ind2; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind2); + CHECK(full_rot_ind2.size() == 3); + + nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind3); + CHECK(rot_ind3.size() == 4); + + typename RotatedSPOsT::RotationIndices full_rot_ind3; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind3); + CHECK(full_rot_ind3.size() == 6); } -TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", - "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", "[wavefunction][template]", TestTypeList) { - using ValueType = typename SPOSetT::ValueType; - using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; - typename RotatedSPOsT::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); - ValueMatrix m3(nmo, nmo); - m3 = ValueType(0); - std::vector params = {0.1, 0.2}; + ValueMatrix m3(nmo, nmo); + m3 = ValueType(0); + std::vector params = {0.1, 0.2}; - RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params, m3); + RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params, m3); - // clang-format off + // clang-format off std::vector expected_data = { 0.0, -0.1, -0.2, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0 }; - // clang-format on + // clang-format on - ValueMatrix expected_m3(expected_data.data(), 3, 3); + ValueMatrix expected_m3(expected_data.data(), 3, 3); - CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result.result) - { - FAIL(check_matrix_result.result_message); - } + CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); } - std::vector params_out(2); - RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( - rot_ind, m3, params_out); - CHECK(params_out[0] == ValueApprox(0.1)); - CHECK(params_out[1] == ValueApprox(0.2)); + std::vector params_out(2); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(rot_ind, m3, params_out); + CHECK(params_out[0] == ValueApprox(0.1)); + CHECK(params_out[1] == ValueApprox(0.2)); } // Expected values of the matrix exponential come from gen_matrix_ops.py -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList) { - using ValueType = typename SPOSetT::ValueType; - using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; - std::vector::ValueType> mat1_data = {0.0}; - typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); - RotatedSPOsT::exponentiate_antisym_matrix(m1); - // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(m1(0, 0) == ValueApprox(1.0)); + std::vector::ValueType> mat1_data = {0.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + RotatedSPOsT::exponentiate_antisym_matrix(m1); + // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(m1(0, 0) == ValueApprox(1.0)); - // clang-format off + // clang-format off std::vector::ValueType> mat2_data = { 0.0, -0.1, 0.1, 0.0 }; - // clang-format on + // clang-format on - typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); - RotatedSPOsT::exponentiate_antisym_matrix(m2); + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + RotatedSPOsT::exponentiate_antisym_matrix(m2); - // clang-format off + // clang-format off std::vector expected_rot2 = { 0.995004165278026, -0.0998334166468282, 0.0998334166468282, 0.995004165278026 }; - // clang-format on + // clang-format on - ValueMatrix expected_m2(expected_rot2.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); - CHECKED_ELSE(check_matrix_result2.result) - { - FAIL(check_matrix_result2.result_message); - } + ValueMatrix expected_m2(expected_rot2.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); + CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - // clang-format off + // clang-format off std::vector m3_input_data = { 0.0, -0.3, -0.1, 0.3, 0.0, -0.2, 0.1, 0.2, 0.0 }; @@ -481,53 +423,46 @@ TEMPLATE_LIST_TEST_CASE( 0.283164960565074, 0.935754803277919, -0.210191705950743, 0.127334574917630, 0.180540076694398, 0.975290308953046 }; - // clang-format on + // clang-format on - ValueMatrix m3(m3_input_data.data(), 3, 3); - ValueMatrix expected_m3(expected_rot3.data(), 3, 3); + ValueMatrix m3(m3_input_data.data(), 3, 3); + ValueMatrix expected_m3(expected_rot3.data(), 3, 3); - RotatedSPOsT::exponentiate_antisym_matrix(m3); + RotatedSPOsT::exponentiate_antisym_matrix(m3); - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result3.result) - { - FAIL(check_matrix_result3.result_message); - } + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } } -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList) { - using ValueType = typename SPOSetT::ValueType; - using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; - std::vector::ValueType> mat1_data = {1.0}; - typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); - typename SPOSetT::ValueMatrix out_m1(1, 1); - RotatedSPOsT::log_antisym_matrix(m1, out_m1); - // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(out_m1(0, 0) == ValueApprox(0.0)); + std::vector::ValueType> mat1_data = {1.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + typename SPOSetT::ValueMatrix out_m1(1, 1); + RotatedSPOsT::log_antisym_matrix(m1, out_m1); + // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(out_m1(0, 0) == ValueApprox(0.0)); - // clang-format off + // clang-format off std::vector start_rot2 = { 0.995004165278026, -0.0998334166468282, 0.0998334166468282, 0.995004165278026 }; std::vector::ValueType> mat2_data = { 0.0, -0.1, 0.1, 0.0 }; - // clang-format on + // clang-format on - ValueMatrix rot_m2(start_rot2.data(), 2, 2); - ValueMatrix out_m2(2, 2); - RotatedSPOsT::log_antisym_matrix(rot_m2, out_m2); + ValueMatrix rot_m2(start_rot2.data(), 2, 2); + ValueMatrix out_m2(2, 2); + RotatedSPOsT::log_antisym_matrix(rot_m2, out_m2); - typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); - CHECKED_ELSE(check_matrix_result2.result) - { - FAIL(check_matrix_result2.result_message); - } + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); + CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - // clang-format off + // clang-format off std::vector start_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, 0.283164960565074, 0.935754803277919, -0.210191705950743, 0.127334574917630, 0.180540076694398, 0.975290308953046 }; @@ -535,502 +470,442 @@ TEMPLATE_LIST_TEST_CASE( std::vector m3_input_data = { 0.0, -0.3, -0.1, 0.3, 0.0, -0.2, 0.1, 0.2, 0.0 }; - // clang-format on - ValueMatrix rot_m3(start_rot3.data(), 3, 3); - ValueMatrix out_m3(3, 3); - RotatedSPOsT::log_antisym_matrix(rot_m3, out_m3); - - typename SPOSetT::ValueMatrix m3(m3_input_data.data(), 3, 3); - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); - CHECKED_ELSE(check_matrix_result3.result) - { - FAIL(check_matrix_result3.result_message); - } + // clang-format on + ValueMatrix rot_m3(start_rot3.data(), 3, 3); + ValueMatrix out_m3(3, 3); + RotatedSPOsT::log_antisym_matrix(rot_m3, out_m3); + + typename SPOSetT::ValueMatrix m3(m3_input_data.data(), 3, 3); + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); + CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } } // Test round trip A -> exp(A) -> log(exp(A)) // The log is multi-valued so this test may fail if the rotation parameters are // too large. The exponentials will be the same, though // exp(log(exp(A))) == exp(A) -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList) { - using ValueType = typename SPOSetT::ValueType; - using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; - typename RotatedSPOsT::RotationIndices rot_ind; - int nel = 2; - int nmo = 4; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 2; + int nmo = 4; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); - std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; + std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; - RotatedSPOsT::constructAntiSymmetricMatrix( - rot_ind, params4, rot_m4); - ValueMatrix orig_rot_m4 = rot_m4; - ValueMatrix out_m4(nmo, nmo); + RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params4, rot_m4); + ValueMatrix orig_rot_m4 = rot_m4; + ValueMatrix out_m4(nmo, nmo); - RotatedSPOsT::exponentiate_antisym_matrix(rot_m4); + RotatedSPOsT::exponentiate_antisym_matrix(rot_m4); - RotatedSPOsT::log_antisym_matrix(rot_m4, out_m4); + RotatedSPOsT::log_antisym_matrix(rot_m4, out_m4); - CheckMatrixResult check_matrix_result4 = - checkMatrix(out_m4, orig_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) - { - FAIL(check_matrix_result4.result_message); - } + CheckMatrixResult check_matrix_result4 = checkMatrix(out_m4, orig_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - std::vector params4out(4); - RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( - rot_ind, out_m4, params4out); - for (int i = 0; i < params4.size(); i++) { - CHECK(params4[i] == ValueApprox(params4out[i])); - } + std::vector params4out(4); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(rot_ind, out_m4, params4out); + for (int i = 0; i < params4.size(); i++) + { + CHECK(params4[i] == ValueApprox(params4out[i])); + } } -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList) { - using RealType = typename OrbitalSetTraits::RealType; - Communicate* c = OHMMS::Controller; - - typename ParticleSetT::ParticleLayout lattice; - lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, - 0.00000000, 0.00000000, 0.00000000, 6.78114995}; - - ParticleSetPoolT ptcl = ParticleSetPoolT(c); - ptcl.setSimulationCell(lattice); - auto ions_uptr = - std::make_unique>(ptcl.getSimulationCell()); - auto elec_uptr = - std::make_unique>(ptcl.getSimulationCell()); - ParticleSetT& ions(*ions_uptr); - ParticleSetT& elec(*elec_uptr); - - ions.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions.create({1}); - ions.R[0] = {0.0, 0.0, 0.0}; - - elec.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec.create({1}); - elec.R[0] = {0.0, 0.0, 0.0}; - - SpeciesSet& tspecies = elec.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - // Add the attribute save_coefs="yes" to the sposet_builder tag to generate - // the spline file for use in eval_bspline_spo.py - - const char* particles = R"( + using RealType = typename OrbitalSetTraits::RealType; + Communicate* c = OHMMS::Controller; + + typename ParticleSetT::ParticleLayout lattice; + lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, + 0.00000000, 0.00000000, 0.00000000, 6.78114995}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + auto ions_uptr = std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions(*ions_uptr); + ParticleSetT& elec(*elec_uptr); + + ions.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions.create({1}); + ions.R[0] = {0.0, 0.0, 0.0}; + + elec.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec.create({1}); + elec.R[0] = {0.0, 0.0, 0.0}; + + SpeciesSet& tspecies = elec.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // Add the attribute save_coefs="yes" to the sposet_builder tag to generate + // the spline file for use in eval_bspline_spo.py + + const char* particles = R"( )"; - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); - - xmlNodePtr root = doc.getRoot(); - - xmlNodePtr sposet_builder = xmlFirstElementChild(root); - xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); - - EinsplineSetBuilderT einSet( - elec, ptcl.getPool(), c, sposet_builder); - auto spo = einSet.createSPOSetFromXML(sposet_ptr); - REQUIRE(spo); - - spo->storeParamsBeforeRotation(); - auto rot_spo = std::make_unique>( - "one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 1 electron, 2 orbitals - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 2); - - rot_spo->buildOptVariables(elec.R.size()); - - typename SPOSetT::ValueMatrix psiM_bare( - elec.R.size(), orbitalsetsize); - typename SPOSetT::GradMatrix dpsiM_bare( - elec.R.size(), orbitalsetsize); - typename SPOSetT::ValueMatrix d2psiM_bare( - elec.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose( - elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // Values generated from eval_bspline_spo.py, the - // generate_point_values_hcpBe function - CHECK( - std::real(psiM_bare[0][0]) == ValueApprox(0.210221765375514)); - CHECK(std::real(psiM_bare[0][1]) == - ValueApprox(-2.984345024542937e-06)); - - CHECK(std::real(d2psiM_bare[0][0]) == - ValueApprox(5.303848362116568)); - - OptVariablesTypeT opt_vars; - rot_spo->checkInVariablesExclusive(opt_vars); - opt_vars.resetIndex(); - rot_spo->checkOutVariables(opt_vars); - rot_spo->resetParametersExclusive(opt_vars); - - using ValueType = TestType; - Vector dlogpsi(1); - Vector dhpsioverpsi(1); - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - - CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); - CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); - - std::vector params = {0.1}; - rot_spo->apply_rotation(params, false); - - rot_spo->evaluate_notranspose( - elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - CHECK(std::real(psiM_bare[0][0]) == - ValueApprox(0.20917123424337608)); - CHECK(std::real(psiM_bare[0][1]) == - ValueApprox(-0.02099012652669549)); - - CHECK(std::real(d2psiM_bare[0][0]) == - ValueApprox(5.277362065087747)); - - dlogpsi[0] = 0.0; - dhpsioverpsi[0] = 0.0; - - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); - CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); -} + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); -// Test construction of delta rotation -TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix", - "[wavefunction][template]", TestTypeList) -{ - using ValueType = typename SPOSetT::ValueType; - using ValueMatrix = typename SPOSetT::ValueMatrix; - - int nel = 2; - int nmo = 4; - typename RotatedSPOsT::RotationIndices rot_ind; - RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); - typename RotatedSPOsT::RotationIndices full_rot_ind; - RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); - // rot_ind size is 4 and full rot_ind size is 6 - - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); - - // When comparing with gen_matrix_ops.py, be aware of the order of indices - // in full_rot - // rot_ind is (0,2) (0,3) (1,2) (1,3) - // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) - // The extra indices go at the back - std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; - std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; - std::vector new_params(6); - - RotatedSPOsT::constructDeltaRotation( - delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); - - // clang-format off - std::vector rot_data4 = - { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, - -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, - 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, - 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; - // clang-format on + xmlNodePtr root = doc.getRoot(); - ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); + xmlNodePtr sposet_builder = xmlFirstElementChild(root); + xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); - CheckMatrixResult check_matrix_result4 = - checkMatrix(rot_m4, new_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) - { - FAIL(check_matrix_result4.result_message); - } + EinsplineSetBuilderT einSet(elec, ptcl.getPool(), c, sposet_builder); + auto spo = einSet.createSPOSetFromXML(sposet_ptr); + REQUIRE(spo); - // Reminder: Ordering! - std::vector expected_new_param = {1.6813965019790489, - 0.3623564254653294, -0.05486544454559908, -0.20574472941408453, - -0.9542513302873077, 0.27497788909911774}; - for (int i = 0; i < new_params.size(); i++) - CHECK(new_params[i] == ValueApprox(expected_new_param[i])); - - // Rotated back to original position - - std::vector new_params2(6); - std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; - RotatedSPOsT::constructDeltaRotation(reverse_delta_params, - new_params, rot_ind, full_rot_ind, new_params2, rot_m4); - for (int i = 0; i < new_params2.size(); i++) - CHECK(new_params2[i] == ValueApprox(old_params[i])); -} + spo->storeParamsBeforeRotation(); + auto rot_spo = std::make_unique>("one_rotated_set", std::move(spo)); -// Test using global rotation -TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters", - "[wavefunction][template]", TestTypeList) -{ - auto fake_spo = std::make_unique>(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOsT rot("fake_rot", std::move(fake_spo)); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSetT vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); + // Sanity check for orbs. Expect 1 electron, 2 orbitals + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 2); - { - hdf_archive hout; - vs.writeToHDF("rot_vp.h5", hout); + rot_spo->buildOptVariables(elec.R.size()); - rot.writeVariationalParameters(hout); - } + typename SPOSetT::ValueMatrix psiM_bare(elec.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare(elec.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare(elec.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // Values generated from eval_bspline_spo.py, the + // generate_point_values_hcpBe function + CHECK(std::real(psiM_bare[0][0]) == ValueApprox(0.210221765375514)); + CHECK(std::real(psiM_bare[0][1]) == ValueApprox(-2.984345024542937e-06)); - auto fake_spo2 = std::make_unique>(); - fake_spo2->setOrbitalSetSize(4); + CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox(5.303848362116568)); - RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); + OptVariablesTypeT opt_vars; + rot_spo->checkInVariablesExclusive(opt_vars); + opt_vars.resetIndex(); + rot_spo->checkOutVariables(opt_vars); + rot_spo->resetParametersExclusive(opt_vars); - optimize::VariableSetT vs2; - rot2.checkInVariablesExclusive(vs2); + using ValueType = TestType; + Vector dlogpsi(1); + Vector dhpsioverpsi(1); + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - hdf_archive hin; - vs2.readFromHDF("rot_vp.h5", hin); - rot2.readVariationalParameters(hin); + CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); + CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); - auto& var = testing::getMyVars(rot2); - CHECK(var[0] == ValueApprox(vs[0])); - CHECK(var[1] == ValueApprox(vs[1])); - CHECK(var[2] == ValueApprox(vs[2])); - CHECK(var[3] == ValueApprox(vs[3])); + std::vector params = {0.1}; + rot_spo->apply_rotation(params, false); - auto& full_var = testing::getMyVarsFull(rot2); - CHECK(full_var[0] == ValueApprox(vs[0])); - CHECK(full_var[1] == ValueApprox(vs[1])); - CHECK(full_var[2] == ValueApprox(vs[2])); - CHECK(full_var[3] == ValueApprox(vs[3])); - CHECK(full_var[4] == ValueApprox(0.0)); - CHECK(full_var[5] == ValueApprox(0.0)); + rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + CHECK(std::real(psiM_bare[0][0]) == ValueApprox(0.20917123424337608)); + CHECK(std::real(psiM_bare[0][1]) == ValueApprox(-0.02099012652669549)); + + CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox(5.277362065087747)); + + dlogpsi[0] = 0.0; + dhpsioverpsi[0] = 0.0; + + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); + CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); + CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); } -// Test using history list. -TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history", - "[wavefunction][template]", TestTypeList) +// Test construction of delta rotation +TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction][template]", TestTypeList) { - auto fake_spo = std::make_unique>(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOsT rot("fake_rot", std::move(fake_spo)); - rot.set_use_global_rotation(false); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSetT vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); - - { - hdf_archive hout; - vs.writeToHDF("rot_vp_hist.h5", hout); + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + int nel = 2; + int nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + // rot_ind size is 4 and full rot_ind size is 6 + + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); + + // When comparing with gen_matrix_ops.py, be aware of the order of indices + // in full_rot + // rot_ind is (0,2) (0,3) (1,2) (1,3) + // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) + // The extra indices go at the back + std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; + std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; + std::vector new_params(6); + + RotatedSPOsT::constructDeltaRotation(delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); + + // clang-format off + std::vector rot_data4 = + { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, + -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, + 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, + 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; + // clang-format on - rot.writeVariationalParameters(hout); - } + ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); - auto fake_spo2 = std::make_unique>(); - fake_spo2->setOrbitalSetSize(4); + CheckMatrixResult check_matrix_result4 = checkMatrix(rot_m4, new_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); + // Reminder: Ordering! + std::vector expected_new_param = {1.6813965019790489, 0.3623564254653294, -0.05486544454559908, + -0.20574472941408453, -0.9542513302873077, 0.27497788909911774}; + for (int i = 0; i < new_params.size(); i++) + CHECK(new_params[i] == ValueApprox(expected_new_param[i])); - optimize::VariableSetT vs2; - rot2.checkInVariablesExclusive(vs2); + // Rotated back to original position - hdf_archive hin; - vs2.readFromHDF("rot_vp_hist.h5", hin); - rot2.readVariationalParameters(hin); + std::vector new_params2(6); + std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; + RotatedSPOsT::constructDeltaRotation(reverse_delta_params, new_params, rot_ind, full_rot_ind, new_params2, + rot_m4); + for (int i = 0; i < new_params2.size(); i++) + CHECK(new_params2[i] == ValueApprox(old_params[i])); +} - auto& var = testing::getMyVars(rot2); - CHECK(var[0] == ValueApprox(vs[0])); - CHECK(var[1] == ValueApprox(vs[1])); - CHECK(var[2] == ValueApprox(vs[2])); - CHECK(var[3] == ValueApprox(vs[3])); +// Test using global rotation +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction][template]", TestTypeList) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto& full_var = testing::getMyVarsFull(rot2); + CHECK(full_var[0] == ValueApprox(vs[0])); + CHECK(full_var[1] == ValueApprox(vs[1])); + CHECK(full_var[2] == ValueApprox(vs[2])); + CHECK(full_var[3] == ValueApprox(vs[3])); + CHECK(full_var[4] == ValueApprox(0.0)); + CHECK(full_var[5] == ValueApprox(0.0)); +} - auto hist = testing::getHistoryParams(rot2); - REQUIRE(hist.size() == 1); - REQUIRE(hist[0].size() == 4); +// Test using history list. +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction][template]", TestTypeList) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + rot.set_use_global_rotation(false); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp_hist.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp_hist.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto hist = testing::getHistoryParams(rot2); + REQUIRE(hist.size() == 1); + REQUIRE(hist[0].size() == 4); } -template +template class DummySPOSetWithoutMWT : public SPOSetT { public: - using ValueVector = typename SPOSetT::ValueVector; - using ValueMatrix = typename SPOSetT::ValueMatrix; - using GradVector = typename SPOSetT::GradVector; - using GradMatrix = typename SPOSetT::GradMatrix; - - DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT(my_name) - { - } - void - setOrbitalSetSize(int norbs) override - { - } - void - evaluateValue(const ParticleSetT& P, int iat, - typename SPOSetT::ValueVector& psi) override - { - assert(psi.size() == 3); - psi[0] = 123; - psi[1] = 456; - psi[2] = 789; - } - void - evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, - GradVector& dpsi, ValueVector& d2psi) override - { - } - void - evaluate_notranspose(const ParticleSetT& P, int first, int last, - ValueMatrix& logdet, GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - } - std::string - getClassName() const override - { - return this->my_name_; - } + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + + DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT(my_name) {} + void setOrbitalSetSize(int norbs) override {} + void evaluateValue(const ParticleSetT& P, int iat, typename SPOSetT::ValueVector& psi) override + { + assert(psi.size() == 3); + psi[0] = 123; + psi[1] = 456; + psi[2] = 789; + } + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + {} + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + {} + std::string getClassName() const override { return this->my_name_; } }; -template +template class DummySPOSetWithMWT : public DummySPOSetWithoutMWT { public: - using ValueVector = typename DummySPOSetWithoutMWT::ValueVector; - - DummySPOSetWithMWT(const std::string& my_name) : - DummySPOSetWithoutMWT(my_name) - { - } - void - mw_evaluateValue(const RefVectorWithLeader>& spo_list, - const RefVectorWithLeader>& P_list, int iat, - const RefVector& psi_v_list) const override + using ValueVector = typename DummySPOSetWithoutMWT::ValueVector; + + DummySPOSetWithMWT(const std::string& my_name) : DummySPOSetWithoutMWT(my_name) {} + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const override + { + for (auto& psi : psi_v_list) { - for (auto& psi : psi_v_list) { - assert(psi.get().size() == 3); - psi.get()[0] = 321; - psi.get()[1] = 654; - psi.get()[2] = 987; - } + assert(psi.get().size() == 3); + psi.get()[0] = 321; + psi.get()[1] = 654; + psi.get()[2] = 987; } + } }; -TEMPLATE_LIST_TEST_CASE( - "RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList) { - // checking that mw_ API works in RotatedSPOs and is not defaulting to - // SPOSet default implementation + // checking that mw_ API works in RotatedSPOs and is not defaulting to + // SPOSet default implementation + { + // First check calling the mw_ APIs for RotatedSPOs, for which the + // underlying implementation just calls the underlying SPOSet mw_ API + // In the case that the underlying SPOSet doesn't specialize the mw_ + // API, the underlying SPOSet will fall back to the default SPOSet mw_, + // which is just a loop over the single walker API. + RotatedSPOsT rot_spo0("rotated0", std::make_unique>("no mw 0")); + RotatedSPOsT rot_spo1("rotated1", std::make_unique>("no mw 1")); + RefVectorWithLeader> spo_list(rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock(spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list(elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) { - // First check calling the mw_ APIs for RotatedSPOs, for which the - // underlying implementation just calls the underlying SPOSet mw_ API - // In the case that the underlying SPOSet doesn't specialize the mw_ - // API, the underlying SPOSet will fall back to the default SPOSet mw_, - // which is just a loop over the single walker API. - RotatedSPOsT rot_spo0("rotated0", - std::make_unique>("no mw 0")); - RotatedSPOsT rot_spo1("rotated1", - std::make_unique>("no mw 1")); - RefVectorWithLeader> spo_list( - rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock> mw_sposet_lock( - spo_res, spo_list); - - const SimulationCellT simulation_cell; - ParticleSetT elec0(simulation_cell); - ParticleSetT elec1(simulation_cell); - RefVectorWithLeader> p_list( - elec0, {elec0, elec1}); - - typename SPOSetT::ValueVector psi0(3); - typename SPOSetT::ValueVector psi1(3); - RefVector::ValueVector> psi_v_list{ - psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) { - CHECK(psi_v_list[iw].get()[0] == ValueApprox(123)); - CHECK(psi_v_list[iw].get()[1] == ValueApprox(456)); - CHECK(psi_v_list[iw].get()[2] == ValueApprox(789)); - } + CHECK(psi_v_list[iw].get()[0] == ValueApprox(123)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(456)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(789)); } + } + { + // In the case that the underlying SPOSet DOES have mw_ specializations, + // we want to make sure that RotatedSPOs are triggering that + // appropriately This will mean that the underlying SPOSets will do the + // appropriate offloading To check this, DummySPOSetWithMW has an + // explicit mw_evaluateValue which sets different values than what gets + // set in evaluateValue. By doing this, we are ensuring that + // RotatedSPOs->mw_evaluaeValue is calling the specialization in the + // underlying SPO and not using the default SPOSet implementation which + // loops over single walker APIs (which have different values enforced + // in + // DummySPOSetWithoutMW + + RotatedSPOsT rot_spo0("rotated0", std::make_unique>("mw 0")); + RotatedSPOsT rot_spo1("rotated1", std::make_unique>("mw 1")); + RefVectorWithLeader> spo_list(rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock(spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list(elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) { - // In the case that the underlying SPOSet DOES have mw_ specializations, - // we want to make sure that RotatedSPOs are triggering that - // appropriately This will mean that the underlying SPOSets will do the - // appropriate offloading To check this, DummySPOSetWithMW has an - // explicit mw_evaluateValue which sets different values than what gets - // set in evaluateValue. By doing this, we are ensuring that - // RotatedSPOs->mw_evaluaeValue is calling the specialization in the - // underlying SPO and not using the default SPOSet implementation which - // loops over single walker APIs (which have different values enforced - // in - // DummySPOSetWithoutMW - - RotatedSPOsT rot_spo0( - "rotated0", std::make_unique>("mw 0")); - RotatedSPOsT rot_spo1( - "rotated1", std::make_unique>("mw 1")); - RefVectorWithLeader> spo_list( - rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock> mw_sposet_lock( - spo_res, spo_list); - - const SimulationCellT simulation_cell; - ParticleSetT elec0(simulation_cell); - ParticleSetT elec1(simulation_cell); - RefVectorWithLeader> p_list( - elec0, {elec0, elec1}); - - typename SPOSetT::ValueVector psi0(3); - typename SPOSetT::ValueVector psi1(3); - RefVector::ValueVector> psi_v_list{ - psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) { - CHECK(psi_v_list[iw].get()[0] == ValueApprox(321)); - CHECK(psi_v_list[iw].get()[1] == ValueApprox(654)); - CHECK(psi_v_list[iw].get()[2] == ValueApprox(987)); - } + CHECK(psi_v_list[iw].get()[0] == ValueApprox(321)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(654)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(987)); } + } } } // namespace qmcplusplus