diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h index 00ee232ae2..9bacda3baa 100644 --- a/src/Containers/OhmmsSoA/VectorSoaContainer.h +++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h @@ -220,6 +220,7 @@ struct VectorSoaContainer template void copyOut(Vector>& out) const { + static_assert(std::is_same_v); PosSoA2AoS(nLocal, D, myData, nGhosts, reinterpret_cast(out.first_address()), D); } diff --git a/src/Estimators/EstimatorManagerBase.h b/src/Estimators/EstimatorManagerBase.h index 565ff60c37..57f43bc9a1 100644 --- a/src/Estimators/EstimatorManagerBase.h +++ b/src/Estimators/EstimatorManagerBase.h @@ -30,10 +30,10 @@ #include "OhmmsPETE/OhmmsVector.h" #include "io/hdf/hdf_archive.h" #include +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { -class MCWalkerConfiguration; class QMCHamiltonian; class CollectablesEstimator; @@ -52,7 +52,7 @@ class EstimatorManagerBase using EstimatorType = ScalarEstimatorBase; using BufferType = std::vector; - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; ///default constructor EstimatorManagerBase(Communicate* c = 0); diff --git a/src/Estimators/EstimatorManagerCrowd.h b/src/Estimators/EstimatorManagerCrowd.h index 3ca7eaf9eb..c92e8e6b3d 100644 --- a/src/Estimators/EstimatorManagerCrowd.h +++ b/src/Estimators/EstimatorManagerCrowd.h @@ -22,6 +22,7 @@ #include "Estimators/EstimatorManagerNew.h" #include "Particle/Walker.h" #include "OhmmsPETE/OhmmsVector.h" +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { @@ -38,7 +39,7 @@ class QMCHamiltonian; class EstimatorManagerCrowd { public: - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using RealType = EstimatorManagerNew::RealType; using FullPrecRealType = EstimatorManagerNew::FullPrecRealType; diff --git a/src/Estimators/OperatorEstBase.h b/src/Estimators/OperatorEstBase.h index fe107ea6ce..9ecca53c89 100644 --- a/src/Estimators/OperatorEstBase.h +++ b/src/Estimators/OperatorEstBase.h @@ -23,6 +23,7 @@ #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "type_traits/DataLocality.h" #include "hdf/hdf_archive.h" +#include "Particle/MCWalkerConfiguration.h" #include namespace qmcplusplus @@ -41,7 +42,7 @@ class OperatorEstBase public: using QMCT = QMCTraits; using FullPrecRealType = QMCT::FullPrecRealType; - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using Data = std::vector; diff --git a/src/Estimators/ScalarEstimatorBase.h b/src/Estimators/ScalarEstimatorBase.h index 9ccaae2dc7..c848aee332 100644 --- a/src/Estimators/ScalarEstimatorBase.h +++ b/src/Estimators/ScalarEstimatorBase.h @@ -42,7 +42,7 @@ struct ScalarEstimatorBase using RealType = QMCTraits::FullPrecRealType; using accumulator_type = accumulator_set; using Walker_t = MCWalkerConfiguration::Walker_t; - using MCPWalker = Walker; + using MCPWalker = Walker_t; using WalkerIterator = MCWalkerConfiguration::const_iterator; using RecordListType = RecordNamedProperty; diff --git a/src/Estimators/tests/test_EstimatorManagerCrowd.cpp b/src/Estimators/tests/test_EstimatorManagerCrowd.cpp index d3f8d42a73..2ce592a0d0 100644 --- a/src/Estimators/tests/test_EstimatorManagerCrowd.cpp +++ b/src/Estimators/tests/test_EstimatorManagerCrowd.cpp @@ -105,7 +105,7 @@ TEST_CASE("EstimatorManagerCrowd PerParticleHamiltonianLogger integration", "[es EstimatorManagerCrowd emc(emn); - using MCPWalker = Walker; + using MCPWalker = EstimatorManagerCrowd::MCPWalker; std::vector walkers(num_walkers, MCPWalker(pset.getTotalNum())); diff --git a/src/Estimators/tests/test_MagnetizationDensity.cpp b/src/Estimators/tests/test_MagnetizationDensity.cpp index 68d97e8597..7bc55db1cd 100644 --- a/src/Estimators/tests/test_MagnetizationDensity.cpp +++ b/src/Estimators/tests/test_MagnetizationDensity.cpp @@ -289,7 +289,7 @@ TEST_CASE("MagnetizationDensity::IntegrationTest", "[estimators]") using GradVector = Vector; using ValueMatrix = Matrix; using PropertySetType = OperatorBase::PropertySetType; - using MCPWalker = Walker; + using MCPWalker = Walker, LatticeParticleTraits>; using Data = MagnetizationDensity::Data; using GradMatrix = Matrix; using namespace testing; diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt index 9dc57daf01..56ac600ce3 100644 --- a/src/Particle/CMakeLists.txt +++ b/src/Particle/CMakeLists.txt @@ -13,35 +13,22 @@ # create libqmcparticle #################################### set(PARTICLE - InitMolecularSystem.cpp InitMolecularSystemT.cpp - SimulationCell.cpp SimulationCellT.cpp - ParticleSetPool.cpp ParticleSetPoolT.cpp - ParticleSet.cpp ParticleSetT.cpp PSdispatcher.cpp - VirtualParticleSet.cpp VirtualParticleSetT.cpp - ParticleSet.BC.cpp DynamicCoordinatesBuilder.cpp DynamicCoordinatesT.cpp - MCCoords.cpp MCCoordsT.cpp - MCWalkerConfiguration.cpp MCWalkerConfigurationT.cpp - WalkerConfigurations.cpp + WalkerConfigurationsT.cpp SpeciesSet.cpp - SampleStack.cpp SampleStackT.cpp - createDistanceTableAA.cpp - createDistanceTableAB.cpp createDistanceTableT.cpp HDFWalkerInputManager.cpp - LongRange/KContainer.cpp LongRange/KContainerT.cpp - LongRange/StructFact.cpp LongRange/StructFactT.cpp LongRange/LPQHIBasis.cpp LongRange/LPQHISRCoulombBasis.cpp @@ -63,9 +50,7 @@ target_include_directories(qmcparticle PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(qmcparticle PRIVATE platform_cpu_LA) target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime) set(PARTICLE_OMPTARGET_SRCS - createDistanceTableTOMPTarget.cpp - createDistanceTableAAOMPTarget.cpp - createDistanceTableABOMPTarget.cpp) + createDistanceTableTOMPTarget.cpp) if(USE_OBJECT_TARGET) add_library(qmcparticle_omptarget OBJECT ${PARTICLE_OMPTARGET_SRCS}) diff --git a/src/Particle/DistanceTable.h b/src/Particle/DistanceTable.h index 3175be4596..12cf0ac941 100644 --- a/src/Particle/DistanceTable.h +++ b/src/Particle/DistanceTable.h @@ -16,355 +16,13 @@ #ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPL_H #define QMCPLUSPLUS_DISTANCETABLEDATAIMPL_H -#include "Particle/ParticleSet.h" -#include -#include "OhmmsPETE/OhmmsVector.h" -#include "OhmmsPETE/OhmmsMatrix.h" -#include "CPU/SIMD/aligned_allocator.hpp" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "DTModes.h" +#include "Configuration.h" +#include "Particle/DistanceTableT.h" namespace qmcplusplus { -class ResourceCollection; - -/** @ingroup nnlist - * @brief Abstract class to manage operations on pair data between two ParticleSets. - * - * Each DistanceTable object is defined by Source and Target of ParticleSet types. - * This base class doesn't contain storage. It is intended for update/compute invoked by ParticleSet. - * Derived AA/AB classes handle the actual storage and data access. - */ -class DistanceTable -{ -public: - static constexpr unsigned DIM = OHMMS_DIM; - - using IndexType = QMCTraits::IndexType; - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - using DistRow = Vector>; - using DisplRow = VectorSoaContainer; - -protected: - // FIXME. once DT takes only DynamicCoordinates, change this type as well. - const ParticleSet& origin_; - - const size_t num_sources_; - const size_t num_targets_; - - ///name of the table - const std::string name_; - - ///operation modes defined by DTModes - DTModes modes_; - -public: - ///constructor using source and target ParticleSet - DistanceTable(const ParticleSet& source, const ParticleSet& target, DTModes modes) - : origin_(source), - num_sources_(source.getTotalNum()), - num_targets_(target.getTotalNum()), - name_(source.getName() + "_" + target.getName()), - modes_(modes) - {} - - /// copy constructor. deleted - DistanceTable(const DistanceTable&) = delete; - - ///virutal destructor - virtual ~DistanceTable() = default; - - ///get modes - inline DTModes getModes() const { return modes_; } - - ///set modes - inline void setModes(DTModes modes) { modes_ = modes; } - - ///return the name of table - inline const std::string& getName() const { return name_; } - - ///returns the reference the origin particleset - const ParticleSet& get_origin() const { return origin_; } - - ///returns the number of centers - inline size_t centers() const { return origin_.getTotalNum(); } - - ///returns the number of centers - inline size_t targets() const { return num_targets_; } - - ///returns the number of source particles - inline size_t sources() const { return num_sources_; } - - /** evaluate the full Distance Table - * @param P the target particle set - */ - virtual void evaluate(ParticleSet& P) = 0; - virtual void mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].evaluate(p_list[iw]); - } - - /** recompute multi walker internal data, recompute - * @param dt_list the distance table batch - * @param p_list the target particle set batch - * @param recompute if true, must recompute. Otherwise, implementation dependent. - */ - virtual void mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& recompute) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - if (recompute[iw]) - dt_list[iw].evaluate(p_list[iw]); - } - - /** evaluate the temporary pair relations when a move is proposed - * @param P the target particle set - * @param rnew proposed new position - * @param iat the particle to be moved - * @param prepare_old if true, prepare (temporary) old distances and displacements for using getOldDists and getOldDispls functions in acceptMove. - * - * Note: some distance table consumers (WaveFunctionComponent) have optimized code paths which require prepare_old = true for accepting a move. - * Drivers/Hamiltonians know whether moves will be accepted or not and manage this flag when calling ParticleSet::makeMoveXXX functions. - */ - virtual void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old = true) = 0; - - /** walker batched version of move. this function may be implemented asynchronously. - * Additional synchroniziation for collecting results should be handled by the caller. - * If DTModes::NEED_TEMP_DATA_ON_HOST, host data will be updated. - * If no consumer requests data on the host, the transfer is skipped. - */ - virtual void mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& rnew_list, - const IndexType iat, - bool prepare_old = true) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); - } - - /** update the distance table by the pair relations from the temporal position. - * Used when a move is accepted in regular mode - * @param iat the particle with an accepted move - */ - virtual void update(IndexType jat) = 0; - - /** fill partially the distance table by the pair relations from the temporary or old particle position. - * Used in forward mode when a move is reject - * @param iat the particle with an accepted move - * @param from_temp if true, copy from temp. if false, copy from old - */ - virtual void updatePartial(IndexType jat, bool from_temp) - { - if (from_temp) - update(jat); - } - - /** walker batched version of updatePartial. - * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and host distance table will not be updated. - */ - virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, - const std::vector& from_temp) - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - /** finalize distance table calculation after particle-by-particle moves - * if update() doesn't make the table up-to-date during p-by-p moves - * finalizePbyP takes action to bring the table up-to-date - */ - virtual void finalizePbyP(const ParticleSet& P) {} - - /** walker batched version of finalizePbyP - * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not updated at all during p-by-p - * Thus, a recompute is necessary to update the whole host distance table for consumers like the Coulomb potential. - */ - virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].finalizePbyP(p_list[iw]); - } - - /** find the first nearest neighbor - * @param iat source particle id - * @param r distance - * @param dr displacement - * @param newpos if true, use the data in temp_r_ and temp_dr_ for the proposed move. - * if false, use the data in distance_[iat] and displacements_[iat] - * @return the id of the nearest particle, -1 not found - */ - virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; - - [[noreturn]] inline void print(std::ostream& os) - { - throw std::runtime_error("DistanceTable::print is not supported"); - } - - /// initialize a shared resource and hand it to a collection - virtual void createResource(ResourceCollection& collection) const {} - - /// acquire a shared resource from a collection - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const - {} - - /// return a shared resource to a collection - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const - {} -}; - -/** AA type of DistanceTable containing storage */ -class DistanceTableAA : public DistanceTable -{ -protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| - * Note: Derived classes decide if it is a memory view or the actual storage - * For only the lower triangle (j=i terms as the nature of operator[]. - * When the storage of the table is allocated as a single memory segment, - * out-of-bound access is still within the segment and - * thus doesn't trigger an alarm by the address sanitizer. - */ - std::vector distances_; - - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] - * Note: Derived classes decide if it is a memory view or the actual storage - * only the lower triangle (j displacements_; - - /// temp_r - DistRow temp_r_; - - /// temp_dr - DisplRow temp_dr_; - - /// old distances - DistRow old_r_; - - /// old displacements - DisplRow old_dr_; - -public: - ///constructor using source and target ParticleSet - DistanceTableAA(const ParticleSet& target, DTModes modes) : DistanceTable(target, target, modes) {} - - /** return full table distances - */ - const std::vector& getDistances() const { return distances_; } - - /** return full table displacements - */ - const std::vector& getDisplacements() const { return displacements_; } - - /** return a row of distances for a given target particle - */ - const DistRow& getDistRow(int iel) const { return distances_[iel]; } - - /** return a row of displacements for a given target particle - */ - const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - - /** return the temporary distances when a move is proposed - */ - const DistRow& getTempDists() const { return temp_r_; } - - /** return the temporary displacements when a move is proposed - */ - const DisplRow& getTempDispls() const { return temp_dr_; } - - /** return old distances set up by move() for optimized distance table consumers - */ - const DistRow& getOldDists() const { return old_r_; } - - /** return old displacements set up by move() for optimized distance table consumers - */ - const DisplRow& getOldDispls() const { return old_dr_; } - - virtual size_t get_num_particls_stored() const { return 0; } - - /// return multi walker temporary pair distance table data pointer - [[noreturn]] virtual const RealType* getMultiWalkerTempDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer for temp not supported"); - } - - virtual const RealType* mw_evalDistsInRange(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - size_t range_begin, - size_t range_end) const - { - return nullptr; - } -}; - -/** AB type of DistanceTable containing storage */ -class DistanceTableAB : public DistanceTable -{ -protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| - * Note: Derived classes decide if it is a memory view or the actual storage - */ - std::vector distances_; - - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] - * Note: Derived classes decide if it is a memory view or the actual storage - */ - std::vector displacements_; - - /// temp_r - DistRow temp_r_; - - /// temp_dr - DisplRow temp_dr_; - -public: - ///constructor using source and target ParticleSet - DistanceTableAB(const ParticleSet& source, const ParticleSet& target, DTModes modes) - : DistanceTable(source, target, modes) - {} - - /** return full table distances - */ - const std::vector& getDistances() const { return distances_; } - - /** return full table displacements - */ - const std::vector& getDisplacements() const { return displacements_; } - - /** return a row of distances for a given target particle - */ - const DistRow& getDistRow(int iel) const { return distances_[iel]; } - - /** return a row of displacements for a given target particle - */ - const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - - /** return the temporary distances when a move is proposed - */ - const DistRow& getTempDists() const { return temp_r_; } - - /** return the temporary displacements when a move is proposed - */ - const DisplRow& getTempDispls() const { return temp_dr_; } - - /// return multi-walker full (all pairs) distance table data pointer - [[noreturn]] virtual const RealType* getMultiWalkerDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer not supported"); - } - - /// return stride of per target pctl data. full table data = stride * num of target particles - [[noreturn]] virtual size_t getPerTargetPctlStrideSize() const - { - throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported"); - } -}; +using DistanceTable = DistanceTableT; +using DistanceTableAA = DistanceTableAAT; +using DistanceTableAB = DistanceTableABT; } // namespace qmcplusplus #endif diff --git a/src/Particle/DynamicCoordinates.h b/src/Particle/DynamicCoordinates.h index 3b53c4a4c6..0cadfddb86 100644 --- a/src/Particle/DynamicCoordinates.h +++ b/src/Particle/DynamicCoordinates.h @@ -15,102 +15,11 @@ #ifndef QMCPLUSPLUS_DYNAMICCOORDINATES_H #define QMCPLUSPLUS_DYNAMICCOORDINATES_H -#include #include "Configuration.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "type_traits/template_types.hpp" +#include "Particle/DynamicCoordinatesT.h" namespace qmcplusplus { -class ResourceCollection; - -/** enumerator for DynamicCoordinates kinds - */ -enum class DynamicCoordinateKind -{ - DC_POS, // SoA positions - DC_POS_OFFLOAD, // SoA positions with OpenMP offload -}; - -/** quantum variables of all the particles - */ -class DynamicCoordinates -{ -public: - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - using ParticlePos = PtclOnLatticeTraits::ParticlePos; - using PosVectorSoa = VectorSoaContainer; - - DynamicCoordinates(const DynamicCoordinateKind kind_in) : variable_kind_(kind_in) {} - - DynamicCoordinates(const DynamicCoordinates&) = default; - DynamicCoordinates& operator=(const DynamicCoordinates&) = delete; - - DynamicCoordinateKind getKind() const { return variable_kind_; } - - virtual ~DynamicCoordinates() = default; - - virtual std::unique_ptr makeClone() = 0; - - /** resize internal storages based on the number of particles - * @param n the number of particles - */ - virtual void resize(size_t n) = 0; - /// return the number of particles - virtual size_t size() const = 0; - - /// overwrite the positions of all the particles. - virtual void setAllParticlePos(const ParticlePos& R) = 0; - /// overwrite the position of one the particle. - virtual void setOneParticlePos(const PosType& pos, size_t iat) = 0; - /** copy the active positions of particles with a uniform id in all the walkers to a single internal buffer. - * @param coords_list a batch of DynamicCoordinates - * @param iat paricle id, uniform across coords_list - * @param new_positions proposed positions - */ - virtual void mw_copyActivePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions) const - { - assert(this == &coords_list.getLeader()); - } - - /** overwrite the positions of particles with a uniform id in all the walkers upon acceptance. - * @param coords_list a batch of DynamicCoordinates - * @param iat paricle id, uniform across coords_list - * @param new_positions proposed positions - * @param isAccepted accept/reject info - */ - virtual void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const = 0; - - /// all particle position accessor - virtual const PosVectorSoa& getAllParticlePos() const = 0; - /// one particle position accessor - virtual PosType getOneParticlePos(size_t iat) const = 0; - - /// secure internal data consistency after p-by-p moves - virtual void donePbyP() {} - - /// initialize a shared resource and hand it to a collection - virtual void createResource(ResourceCollection& collection) const {} - - /// acquire a shared resource from a collection - virtual void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - {} - - /// return a shared resource to a collection - virtual void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - {} - -protected: - /// type of dynamic coordinates - const DynamicCoordinateKind variable_kind_; -}; +using DynamicCoordinates = DynamicCoordinatesT; } // namespace qmcplusplus #endif diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h index d7fc1994fa..36d94868d6 100644 --- a/src/Particle/DynamicCoordinatesT.h +++ b/src/Particle/DynamicCoordinatesT.h @@ -19,7 +19,6 @@ #include "OhmmsSoA/VectorSoaContainer.h" #include "ParticleSetTraits.h" #include "type_traits/template_types.hpp" -#include "DynamicCoordinates.h" namespace qmcplusplus { @@ -27,11 +26,11 @@ class ResourceCollection; /** enumerator for DynamicCoordinates kinds */ -// enum class DynamicCoordinateKind -// { -// DC_POS, // SoA positions -// DC_POS_OFFLOAD, // SoA positions with OpenMP offload -// }; +enum class DynamicCoordinateKind +{ + DC_POS, // SoA positions + DC_POS_OFFLOAD, // SoA positions with OpenMP offload +}; /** quantum variables of all the particles */ diff --git a/src/Particle/InitMolecularSystem.cpp b/src/Particle/InitMolecularSystem.cpp deleted file mode 100644 index 9639bdbebf..0000000000 --- a/src/Particle/InitMolecularSystem.cpp +++ /dev/null @@ -1,275 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign -// Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/**@file InitMolecularSystem.cpp - * @brief Implements InitMolecuarSystem operators. - */ -#include "InitMolecularSystem.h" -#include "Particle/ParticleSetPool.h" -#include "OhmmsData/AttributeSet.h" -#include "Particle/DistanceTable.h" -#include "ParticleBase/RandomSeqGeneratorGlobal.h" - -namespace qmcplusplus -{ -using RealType = QMCTraits::RealType; - -InitMolecularSystem::InitMolecularSystem(ParticleSetPool& pset, const char* aname) - : OhmmsElementBase(aname), ptclPool(pset) -{} - -bool InitMolecularSystem::put(xmlNodePtr cur) -{ - std::string target("e"), source("i"), volume("no"); - OhmmsAttributeSet hAttrib; - hAttrib.add(target, "target"); - hAttrib.add(source, "source"); - hAttrib.add(volume, "use_volume"); - hAttrib.put(cur); - ParticleSet* els = ptclPool.getParticleSet(target); - if (els == 0) - { - ERRORMSG("No target particle " << target << " exists.") - return false; - } - ParticleSet* ions = ptclPool.getParticleSet(source); - if (ions == 0) - { - ERRORMSG("No source particle " << source << " exists.") - return false; - } - - app_log() << "" << std::endl; - - if (volume == "yes") - initWithVolume(ions, els); - else - initMolecule(ions, els); - - makeUniformRandom(els->spins); - els->spins *= 2 * M_PI; - - app_log() << "" << std::endl; - app_log().flush(); - - return true; -} - -void InitMolecularSystem::initAtom(ParticleSet* ions, ParticleSet* els) -{ - //3N-dimensional Gaussian - ParticleSet::ParticlePos chi(els->getTotalNum()); - makeGaussRandom(chi); - RealType q = std::sqrt(static_cast(els->getTotalNum())) * 0.5; - int nel(els->getTotalNum()), items(0); - while (nel) - { - els->R[items] = ions->R[0] + q * chi[items]; - --nel; - ++items; - } -} - -struct LoneElectron -{ - int ID; - RealType BondLength; - inline LoneElectron(int id, RealType bl) : ID(id), BondLength(bl) {} -}; - -void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) -{ - if (ions->getTotalNum() == 1) - return initAtom(ions, els); - - const int d_ii_ID = ions->addTable(*ions); - ions->update(); - const ParticleSet::ParticleIndex& grID(ions->GroupID); - SpeciesSet& Species(ions->getSpeciesSet()); - int Centers = ions->getTotalNum(); - std::vector Qtot(Centers), Qcore(Centers), Qval(Centers, 0); - //use charge as the core electrons first - int icharge = Species.addAttribute("charge"); - //Assign default core charge - for (int iat = 0; iat < Centers; iat++) - Qtot[iat] = static_cast(Species(icharge, grID[iat])); - //cutoff radius (Bohr) this a random choice - RealType cutoff = 4.0; - ParticleSet::ParticlePos chi(els->getTotalNum()); - //makeGaussRandom(chi); - makeSphereRandom(chi); - // the upper limit of the electron index with spin up - const int numUp = els->last(0); - // the upper limit of the electron index with spin down. Pay attention to the no spin down electron case. - const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0); - // consumer counter of random numbers chi - int random_number_counter = 0; - int nup_tot = 0, ndown_tot = numUp; - std::vector loneQ; - RealType rmin = cutoff; - ParticleSet::SingleParticlePos cm; - - const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); - // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, put Q[iat]-1 and save the lone electron. - for (size_t iat = 0; iat < Centers; iat++) - { - cm += ions->R[iat]; - for (size_t jat = iat + 1; jat < Centers; ++jat) - { - rmin = std::min(rmin, dist[jat][iat]); - } - //use 40% of the minimum bond - RealType sep = rmin * 0.4; - int v2 = Qtot[iat] / 2; - if (Qtot[iat] > v2 * 2) - { - loneQ.push_back(LoneElectron(iat, sep)); - } - for (int k = 0; k < v2; k++) - { - // initialize electron positions in pairs - if (nup_tot < numUp) - els->R[nup_tot++] = ions->R[iat] + sep * chi[random_number_counter++]; - if (ndown_tot < numDown) - els->R[ndown_tot++] = ions->R[iat] + sep * chi[random_number_counter++]; - } - } - - // Step 2. Distribute the electrons left alone - // mmorales: changed order of spin assignment to help with spin - // imbalances in molecules at large distances. - // Not guaranteed to work, but should help in most cases - // as long as atoms in molecules are defined sequencially - std::vector::iterator it(loneQ.begin()); - std::vector::iterator it_end(loneQ.end()); - while (it != it_end && nup_tot != numUp && ndown_tot != numDown) - { - if (nup_tot < numUp) - { - els->R[nup_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++]; - ++it; - } - if (ndown_tot < numDown && it != it_end) - { - els->R[ndown_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++]; - ++it; - } - } - - // Step 3. Handle more than neutral electrons - //extra electrons around the geometric center - RealType cnorm = 1.0 / static_cast(Centers); - RealType sep = rmin * 2; - cm = cnorm * cm; - if (nup_tot < numUp) - while (nup_tot < numUp) - els->R[nup_tot++] = cm + sep * chi[random_number_counter++]; - if (ndown_tot < numDown) - while (ndown_tot < numDown) - els->R[ndown_tot++] = cm + sep * chi[random_number_counter++]; - - // safety check. all the random numbers should have been consumed once and only once. - if (random_number_counter != chi.size()) - throw std::runtime_error("initMolecule unexpected random number consumption. Please report a bug!"); - - //put all the electrons in a unit box - if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) - { - els->R.setUnit(PosUnit::Cartesian); - els->applyBC(els->R); - els->update(false); - } -} - -///helper function to determine the lower bound of a domain (need to move up) -template -inline TinyVector lower_bound(const TinyVector& a, const TinyVector& b) -{ - return TinyVector(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2])); -} - -///helper function to determine the upper bound of a domain (need to move up) -template -inline TinyVector upper_bound(const TinyVector& a, const TinyVector& b) -{ - return TinyVector(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2])); -} - -void InitMolecularSystem::initWithVolume(ParticleSet* ions, ParticleSet* els) -{ - TinyVector start(1.0); - TinyVector end(0.0); - - ParticleSet::ParticlePos Ru(ions->getTotalNum()); - Ru.setUnit(PosUnit::Lattice); - ions->applyBC(ions->R, Ru); - - for (int iat = 0; iat < Ru.size(); iat++) - { - start = lower_bound(Ru[iat], start); - end = upper_bound(Ru[iat], end); - } - - TinyVector shift; - Tensor newbox(ions->getLattice().R); - - RealType buffer = 2.0; //buffer 2 bohr - for (int idim = 0; idim < OHMMS_DIM; ++idim) - { - //if(ions->getLattice().BoxBConds[idim]) - //{ - // start[idim]=0.0; - // end[idim]=1.0; - // shift[idim]=0.0; - //} - //else - { - RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim]; - start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r)); - end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r)); - shift[idim] = start[idim] * ions->getLattice().Length[idim]; - if (std::abs(end[idim] = start[idim]) < buffer) - { //handle singular case - start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0); - end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0); - } - - newbox(idim, idim) = (end[idim] - start[idim]) * ions->getLattice().Length[idim]; - } - } - - ParticleSet::ParticleLayout slattice(ions->getLattice()); - slattice.set(newbox); - - app_log() << " InitMolecularSystem::initWithVolume " << std::endl; - app_log() << " Effective Lattice shifted by " << shift << std::endl; - app_log() << newbox << std::endl; - - Ru.resize(els->getTotalNum()); - makeUniformRandom(Ru); - for (int iat = 0; iat < Ru.size(); ++iat) - els->R[iat] = slattice.toCart(Ru[iat]) + shift; - els->R.setUnit(PosUnit::Cartesian); -} - -bool InitMolecularSystem::put(std::istream& is) { return true; } - -bool InitMolecularSystem::get(std::ostream& os) const { return true; } - -void InitMolecularSystem::reset() {} -} // namespace qmcplusplus diff --git a/src/Particle/InitMolecularSystem.h b/src/Particle/InitMolecularSystem.h index 41f56d8f77..fddfc70916 100644 --- a/src/Particle/InitMolecularSystem.h +++ b/src/Particle/InitMolecularSystem.h @@ -17,49 +17,10 @@ #ifndef QMCPLUSPLUS_INITMOLECULARSYSTEM_H #define QMCPLUSPLUS_INITMOLECULARSYSTEM_H -#include "OhmmsData/OhmmsElementBase.h" -#include +#include "Particle/InitMolecularSystemT.h" namespace qmcplusplus { -class ParticleSet; -class ParticleSetPool; - -/* Engine to initialize the initial electronic structure for a molecular system - */ -class InitMolecularSystem : public OhmmsElementBase -{ -public: - InitMolecularSystem(ParticleSetPool& pset, const char* aname = "mosystem"); - - bool get(std::ostream& os) const override; - bool put(std::istream& is) override; - bool put(xmlNodePtr cur) override; - void reset() override; - - /** initialize els for an atom - */ - void initAtom(ParticleSet* ions, ParticleSet* els); - /** initialize els position for a molecule - * - * Use the valence of each ionic species on a sphere - */ - void initMolecule(ParticleSet* ions, ParticleSet* els); - /** initialize els for the systems with a mixed boundary - * - * Use the bound of the ionic systems and uniform random positions within a reduced box - */ - void initWithVolume(ParticleSet* ions, ParticleSet* els); - -private: - /** pointer to ParticleSetPool - * - * QMCHamiltonian needs to know which ParticleSet object - * is used as an input object for the evaluations. - * Any number of ParticleSet can be used to describe - * a QMCHamiltonian. - */ - ParticleSetPool& ptclPool; -}; +using InitMolecularSystem = InitMolecularSystemT; } // namespace qmcplusplus #endif diff --git a/src/Particle/InitMolecularSystemT.cpp b/src/Particle/InitMolecularSystemT.cpp index a4559fc288..896a6c1d22 100644 --- a/src/Particle/InitMolecularSystemT.cpp +++ b/src/Particle/InitMolecularSystemT.cpp @@ -306,9 +306,18 @@ InitMolecularSystemT::reset() { } +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION template class InitMolecularSystemT; +#else template class InitMolecularSystemT; +#endif +#else +#ifndef MIXED_PRECISION template class InitMolecularSystemT>; +#else template class InitMolecularSystemT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/Particle/Lattice/CrystalLattice.h b/src/Particle/Lattice/CrystalLattice.h index a8d56236e5..54c341836b 100644 --- a/src/Particle/Lattice/CrystalLattice.h +++ b/src/Particle/Lattice/CrystalLattice.h @@ -56,7 +56,7 @@ struct CrystalLattice : public LRBreakupParameters { /// alias to the base class using Base = LRBreakupParameters; - + static_assert(std::is_floating_point_v); ///enumeration for the dimension of the lattice enum { diff --git a/src/Particle/LongRange/KContainer.cpp b/src/Particle/LongRange/KContainer.cpp deleted file mode 100644 index 72d4c8bd17..0000000000 --- a/src/Particle/LongRange/KContainer.cpp +++ /dev/null @@ -1,274 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "KContainer.h" -#include -#include -#include "Message/Communicate.h" -#include "LRCoulombSingleton.h" -#include "Utilities/qmc_common.h" - -namespace qmcplusplus -{ -void KContainer::updateKLists(const ParticleLayout& lattice, - RealType kc, - unsigned ndim, - const PosType& twist, - bool useSphere) -{ - kcutoff = kc; - if (kcutoff <= 0.0) - { - APP_ABORT(" Illegal cutoff for KContainer"); - } - findApproxMMax(lattice, ndim); - BuildKLists(lattice, twist, useSphere); - - app_log() << " KContainer initialised with cutoff " << kcutoff << std::endl; - app_log() << " # of K-shell = " << kshell.size() << std::endl; - app_log() << " # of K points = " << kpts.size() << std::endl; - app_log() << std::endl; -} - -void KContainer::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) -{ - //Estimate the size of the parallelpiped that encompasses a sphere of kcutoff. - //mmax is stored as integer translations of the reciprocal cell vectors. - //Does not require an orthorhombic cell. - /* Old method. - //2pi is not included in lattice.b - Matrix mmat; - mmat.resize(3,3); - for(int j=0;j<3;j++) - for(int i=0;i<3;i++){ - mmat[i][j] = 0.0; - for(int k=0;k<3;k++) - mmat[i][j] = mmat[i][j] + 4.0*M_PI*M_PI*lattice.b(k)[i]*lattice.b(j)[k]; - } - - TinyVector x,temp; - RealType tempr; - for(int idim=0;idim<3;idim++){ - int i = ((idim)%3); - int j = ((idim+1)%3); - int k = ((idim+2)%3); - - x[i] = 1.0; - x[j] = (mmat[j][k]*mmat[k][i] - mmat[k][k]*mmat[i][j]); - x[j]/= (mmat[j][j]*mmat[k][k] - mmat[j][k]*mmat[j][k]); - x[k] = -(mmat[k][i] + mmat[j][k]*x[j])/mmat[k][k]; - - for(i=0;i<3;i++){ - temp[i] = 0.0; - for(j=0;j<3;j++) - temp[i] += mmat[i][j]*x[j]; - } - - tempr = dot(x,temp); - mmax[idim] = static_cast(sqrt(4.0*kcut2/tempr)) + 1; - } - */ - // see rmm, Electronic Structure, p. 85 for details - for (int i = 0; i < DIM; i++) - mmax[i] = static_cast(std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) * kcutoff / (2 * M_PI))) + 1; - - mmax[DIM] = mmax[0]; - for (int i = 1; i < DIM; ++i) - mmax[DIM] = std::max(mmax[i], mmax[DIM]); - - //overwrite the non-periodic directon to be zero - if (LRCoulombSingleton::isQuasi2D()) - { - app_log() << " No kspace sum perpendicular to slab " << std::endl; - mmax[2] = 0; - } - if (ndim < 3) - { - app_log() << " No kspace sum along z " << std::endl; - mmax[2] = 0; - } - if (ndim < 2) - mmax[1] = 0; -} - -void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere) -{ - TinyVector TempActualMax; - TinyVector kvec; - TinyVector kvec_cart; - RealType modk2; - std::vector> kpts_tmp; - std::vector kpts_cart_tmp; - std::vector ksq_tmp; - // reserve the space for memory efficiency - if (useSphere) - { - const RealType kcut2 = kcutoff * kcutoff; - //Loop over guesses for valid k-points. - for (int i = -mmax[0]; i <= mmax[0]; i++) - { - kvec[0] = i; - for (int j = -mmax[1]; j <= mmax[1]; j++) - { - kvec[1] = j; - for (int k = -mmax[2]; k <= mmax[2]; k++) - { - kvec[2] = k; - //Do not include k=0 in evaluations. - if (i == 0 && j == 0 && k == 0) - continue; - //Convert kvec to Cartesian - kvec_cart = lattice.k_cart(kvec + twist); - //Find modk - modk2 = dot(kvec_cart, kvec_cart); - if (modk2 > kcut2) - continue; //Inside cutoff? - //This k-point should be added to the list - kpts_tmp.push_back(kvec); - kpts_cart_tmp.push_back(kvec_cart); - ksq_tmp.push_back(modk2); - //Update record of the allowed maximum translation. - for (int idim = 0; idim < 3; idim++) - if (std::abs(kvec[idim]) > TempActualMax[idim]) - TempActualMax[idim] = std::abs(kvec[idim]); - } - } - } - } - else - { - // Loop over all k-points in the parallelpiped and add them to kcontainer - // note layout is for interfacing with fft, so for each dimension, the - // positive indexes come first then the negative indexes backwards - // e.g. 0, 1, .... mmax, -mmax+1, -mmax+2, ... -1 - const int idimsize = mmax[0] * 2; - const int jdimsize = mmax[1] * 2; - const int kdimsize = mmax[2] * 2; - for (int i = 0; i < idimsize; i++) - { - kvec[0] = i; - if (kvec[0] > mmax[0]) - kvec[0] -= idimsize; - for (int j = 0; j < jdimsize; j++) - { - kvec[1] = j; - if (kvec[1] > mmax[1]) - kvec[1] -= jdimsize; - for (int k = 0; k < kdimsize; k++) - { - kvec[2] = k; - if (kvec[2] > mmax[2]) - kvec[2] -= kdimsize; - // get cartesian location and modk2 - kvec_cart = lattice.k_cart(kvec); - modk2 = dot(kvec_cart, kvec_cart); - // add k-point to lists - kpts_tmp.push_back(kvec); - kpts_cart_tmp.push_back(kvec_cart); - ksq_tmp.push_back(modk2); - } - } - } - // set allowed maximum translation - TempActualMax[0] = mmax[0]; - TempActualMax[1] = mmax[1]; - TempActualMax[2] = mmax[2]; - } - - //Update a record of the number of k vectors - numk = kpts_tmp.size(); - std::map*> kpts_sorted; - //create the map: use simple integer with resolution of 0.00000001 in ksq - for (int ik = 0; ik < numk; ik++) - { - //This is a workaround for ewald bug (Issue #2105). Basically, 1e-7 is the resolution of |k|^2 for doubles, - //so we jack up the tolerance to match that. - const int64_t k_ind = static_cast(ksq_tmp[ik] * 10000000); - auto it(kpts_sorted.find(k_ind)); - if (it == kpts_sorted.end()) - { - std::vector* newSet = new std::vector; - kpts_sorted[k_ind] = newSet; - newSet->push_back(ik); - } - else - { - (*it).second->push_back(ik); - } - } - std::map*>::iterator it(kpts_sorted.begin()); - kpts.resize(numk); - kpts_cart.resize(numk); - kpts_cart_soa_.resize(numk); - ksq.resize(numk); - kshell.resize(kpts_sorted.size() + 1, 0); - int ok = 0, ish = 0; - while (it != kpts_sorted.end()) - { - std::vector::iterator vit((*it).second->begin()); - while (vit != (*it).second->end()) - { - int ik = (*vit); - kpts[ok] = kpts_tmp[ik]; - kpts_cart[ok] = kpts_cart_tmp[ik]; - kpts_cart_soa_(ok) = kpts_cart_tmp[ik]; - ksq[ok] = ksq_tmp[ik]; - ++vit; - ++ok; - } - kshell[ish + 1] = kshell[ish] + (*it).second->size(); - ++it; - ++ish; - } - kpts_cart_soa_.updateTo(); - it = kpts_sorted.begin(); - std::map*>::iterator e_it(kpts_sorted.end()); - while (it != e_it) - { - delete it->second; - it++; - } - //Finished searching k-points. Copy list of maximum translations. - mmax[DIM] = 0; - for (int idim = 0; idim < DIM; idim++) - { - mmax[idim] = TempActualMax[idim]; - mmax[DIM] = std::max(mmax[idim], mmax[DIM]); - //if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim]; - } - //Now fill the array that returns the index of -k when given the index of k. - minusk.resize(numk); - - //Assigns a unique hash value to each kpoint. - auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t { - int64_t hash = 0; // this will cause integral promotion below - for (int i = 0; i < inpv.Size; ++i) - hash += inpv[i] + hash * hashparam; - return hash; - }; - - // Create a map from the hash value for each k vector to the index - std::map hashToIndex; - for (int ki = 0; ki < numk; ki++) - { - hashToIndex[getHashOfVec(kpts[ki], numk)] = ki; - } - // Use the map to find the index of -k from the index of k - for (int ki = 0; ki < numk; ki++) - { - minusk[ki] = hashToIndex[getHashOfVec(-1 * kpts[ki], numk)]; - } -} - -} // namespace qmcplusplus diff --git a/src/Particle/LongRange/KContainer.h b/src/Particle/LongRange/KContainer.h index eee91affc7..c181806107 100644 --- a/src/Particle/LongRange/KContainer.h +++ b/src/Particle/LongRange/KContainer.h @@ -16,83 +16,11 @@ #define QMCPLUSPLUS_KCONTAINER_H #include "Configuration.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "KContainerT.h" namespace qmcplusplus { -/** Container for k-points - * - * It generates a set of k-points that are unit-translations of the reciprocal-space - * cell. K-points are generated within a spherical cutoff set by the supercell - */ -class KContainer : public QMCTraits -{ -private: - /// The cutoff up to which k-vectors are generated. - RealType kcutoff; - -public: - //Typedef for the lattice-type - using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; - - ///number of k-points - int numk; - - /** maximum integer translations of reciprocal cell within kc. - * - * Last index is max. of first dimension+1 - */ - TinyVector mmax; - - /** K-vector in reduced coordinates - */ - std::vector> kpts; - /** K-vector in Cartesian coordinates - */ - std::vector kpts_cart; - /** squre of kpts in Cartesian coordniates - */ - std::vector ksq; - /** Given a k index, return index to -k - */ - std::vector minusk; - /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */ - std::vector kshell; - - /** k points sorted by the |k| excluding |k|=0 - * - * The first for |k| - * The second for a map to the full index. The size of the second is the degeneracy. - */ - //std::map*> kpts_sorted; - - /** update k-vectors - * @param sc supercell - * @param kc cutoff radius in the K - * @param twist shifts the center of the grid of k-vectors - * @param useSphere if true, use the |K| - */ - void updateKLists(const ParticleLayout& lattice, - RealType kc, - unsigned ndim, - const PosType& twist = PosType(), - bool useSphere = true); - - const auto& get_kpts_cart_soa() const { return kpts_cart_soa_; } - -private: - /** compute approximate parallelpiped that surrounds kc - * @param lattice supercell - */ - void findApproxMMax(const ParticleLayout& lattice, unsigned ndim); - /** construct the container for k-vectors */ - void BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere); - - /** K-vector in Cartesian coordinates in SoA layout - */ - VectorSoaContainer> kpts_cart_soa_; -}; +using KContainer = KContainerT; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFact.cpp b/src/Particle/LongRange/StructFact.cpp deleted file mode 100644 index b5f23aab9f..0000000000 --- a/src/Particle/LongRange/StructFact.cpp +++ /dev/null @@ -1,216 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "StructFact.h" -#include "CPU/math.hpp" -#include "CPU/e2iphi.h" -#include "CPU/SIMD/vmath.hpp" -#include "CPU/BLAS.hpp" -#include "Utilities/qmc_common.h" -#include "OMPTarget/OMPTargetMath.hpp" -#include "RealSpacePositionsOMPTarget.h" -#include "LRCoulombSingleton.h" - -namespace qmcplusplus -{ -//Constructor - pass arguments to k_lists_' constructor -StructFact::StructFact(const ParticleLayout& lattice, const KContainer& k_lists) - : SuperCellEnum(SUPERCELL_BULK), - k_lists_(k_lists), - StorePerParticle(false), - update_all_timer_(createGlobalTimer("StructFact::update_all_part", timer_level_fine)) -{ - if (LRCoulombSingleton::isQuasi2D()) - { - app_log() << " Setting StructFact::SuperCellEnum=SUPERCELL_SLAB " << std::endl; - SuperCellEnum = SUPERCELL_SLAB; - } -} - -//Destructor -StructFact::~StructFact() = default; - -void StructFact::resize(int nkpts, int num_species, int num_ptcls) -{ - rhok_r.resize(num_species, nkpts); - rhok_i.resize(num_species, nkpts); - if (StorePerParticle) - { - eikr_r.resize(num_ptcls, nkpts); - eikr_i.resize(num_ptcls, nkpts); - } -} - - -void StructFact::updateAllPart(const ParticleSet& P) -{ - ScopedTimer local(update_all_timer_); - computeRhok(P); -} - -void StructFact::mw_updateAllPart(const RefVectorWithLeader& sk_list, - const RefVectorWithLeader& p_list, - SKMultiWalkerMem& mw_mem) -{ - auto& sk_leader = sk_list.getLeader(); - auto& p_leader = p_list.getLeader(); - ScopedTimer local(sk_leader.update_all_timer_); - if (p_leader.getCoordinates().getKind() != DynamicCoordinateKind::DC_POS_OFFLOAD || sk_leader.StorePerParticle) - for (int iw = 0; iw < sk_list.size(); iw++) - sk_list[iw].computeRhok(p_list[iw]); - else - { - const size_t nw = p_list.size(); - const size_t num_species = p_leader.groups(); - const auto& kpts_cart = sk_leader.k_lists_.get_kpts_cart_soa(); - const size_t nk = sk_leader.k_lists_.numk; - const size_t nk_padded = kpts_cart.capacity(); - - auto& coordinates_leader = static_cast(p_leader.getCoordinates()); - auto& mw_rsoa_dev_ptrs = coordinates_leader.getMultiWalkerRSoADevicePtrs(); - const size_t np_padded = p_leader.getCoordinates().getAllParticlePos().capacity(); - - constexpr size_t cplx_stride = 2; - mw_mem.nw_rhok.resize(nw * num_species * cplx_stride, nk_padded); - - // make the compute over nk by blocks - constexpr size_t kblock_size = 512; - const size_t num_kblocks = (nk + kblock_size) / kblock_size; - - auto* mw_rsoa_ptr = mw_rsoa_dev_ptrs.data(); - auto* kpts_cart_ptr = kpts_cart.data(); - auto* mw_rhok_ptr = mw_mem.nw_rhok.data(); - auto* group_offsets = p_leader.get_group_offsets().data(); - - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])") - for (int iw = 0; iw < nw; iw++) - for (int ib = 0; ib < num_kblocks; ib++) - { - const size_t offset = ib * kblock_size; - const size_t this_block_size = omptarget::min(kblock_size, nk - offset); - const auto* rsoa_ptr = mw_rsoa_ptr[iw]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int ik = 0; ik < this_block_size; ik++) - for (int is = 0; is < num_species; is++) - { - RealType rhok_r(0), rhok_i(0); - - for (int ip = group_offsets[is]; ip < group_offsets[is + 1]; ip++) - { - RealType s, c, phase(0); - for (int idim = 0; idim < DIM; idim++) - phase += kpts_cart_ptr[ik + offset + nk_padded * idim] * rsoa_ptr[ip + idim * np_padded]; - omptarget::sincos(phase, &s, &c); - rhok_r += c; - rhok_i += s; - } - - mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + offset + ik] = rhok_r; - mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + nk_padded + offset + ik] = rhok_i; - } - } - - for (int iw = 0; iw < nw; iw++) - for (int is = 0; is < num_species; is++) - { - std::copy_n(mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride], nk, sk_list[iw].rhok_r[is]); - std::copy_n(mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride + 1], nk, sk_list[iw].rhok_i[is]); - } - } -} - - -/** evaluate rok per species, eikr per particle - */ -void StructFact::computeRhok(const ParticleSet& P) -{ - const size_t num_ptcls = P.getTotalNum(); - const size_t num_species = P.groups(); - const size_t nk = k_lists_.numk; - resize(nk, num_species, num_ptcls); - - rhok_r = 0.0; - rhok_i = 0.0; - if (StorePerParticle) - { - // save per particle and species value - for (int i = 0; i < num_ptcls; ++i) - { - const auto& pos = P.R[i]; - auto* restrict eikr_r_ptr = eikr_r[i]; - auto* restrict eikr_i_ptr = eikr_i[i]; - auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)]; - auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)]; -#pragma omp simd - for (int ki = 0; ki < nk; ki++) - { - qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &eikr_i_ptr[ki], &eikr_r_ptr[ki]); - rhok_r_ptr[ki] += eikr_r_ptr[ki]; - rhok_i_ptr[ki] += eikr_i_ptr[ki]; - } - } - } - else - { - // save per species value - for (int i = 0; i < num_ptcls; ++i) - { - const auto& pos = P.R[i]; - auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)]; - auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)]; -#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) -#pragma omp simd - for (int ki = 0; ki < nk; ki++) - { - RealType s, c; - qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &s, &c); - rhok_r_ptr[ki] += c; - rhok_i_ptr[ki] += s; - } -#else - // make the compute over nk by blocks - constexpr size_t kblock_size = 512; - const size_t num_kblocks = (nk + kblock_size) / kblock_size; - RealType phiV[kblock_size], eikr_r_temp[kblock_size], eikr_i_temp[kblock_size]; - - for (int ib = 0; ib < num_kblocks; ib++) - { - const size_t offset = ib * kblock_size; - const size_t this_block_size = std::min(kblock_size, nk - offset); - for (int ki = 0; ki < this_block_size; ki++) - phiV[ki] = dot(k_lists_.kpts_cart[ki + offset], pos); - eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp); - for (int ki = 0; ki < this_block_size; ki++) - { - rhok_r_ptr[ki + offset] += eikr_r_temp[ki]; - rhok_i_ptr[ki + offset] += eikr_i_temp[ki]; - } - } -#endif - } - } -} - -void StructFact::turnOnStorePerParticle(const ParticleSet& P) -{ - if (!StorePerParticle) - { - StorePerParticle = true; - computeRhok(P); - } -} - -} // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFact.h b/src/Particle/LongRange/StructFact.h index cfa29e9255..79fe0a2e99 100644 --- a/src/Particle/LongRange/StructFact.h +++ b/src/Particle/LongRange/StructFact.h @@ -14,113 +14,12 @@ #ifndef QMCPLUSPLUS_STRUCTFACT_H #define QMCPLUSPLUS_STRUCTFACT_H -#include "OhmmsPETE/OhmmsVector.h" -#include "OhmmsPETE/OhmmsMatrix.h" #include "Configuration.h" -#include -#include -#include -#include +#include "StructFactT.h" namespace qmcplusplus { -class ParticleSet; -class KContainer; -struct SKMultiWalkerMem; - -/** @ingroup longrange - *\brief Calculates the structure-factor for a particle set - * - * Structure factor per species - * Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf r_i}}\f$ - * Structure factor per particle - * eikr[i][k] - */ -class StructFact : public QMCTraits -{ -public: - //Typedef for the lattice-type - using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; - - /** enumeration for the methods to handle mixed bconds - * - * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with mixed BC - */ - int SuperCellEnum; - ///2-D container for the phase - Matrix rhok_r, rhok_i; - Matrix eikr_r, eikr_i; - /** Constructor - copy ParticleSet and init. k-shells - * @param lattice long range box - * @param kc cutoff for k - * - * At least in the batched version Structure factor is _NOT_ valid - * after construction. - */ - StructFact(const ParticleLayout& lattice, const KContainer& k_lists); - /// desructor - ~StructFact(); - - /** Update Rhok if all particles moved - */ - void updateAllPart(const ParticleSet& P); - - /** Update RhoK for all particles for multiple walkers particles. - * - * In batched context until this is called StructFact is invalid and will cause a crash if any Hamiltonian using StructFact - * indirectly through ParticleSet is evaluated. - */ - static void mw_updateAllPart(const RefVectorWithLeader& sk_list, - const RefVectorWithLeader& p_list, - SKMultiWalkerMem& mw_mem); - - /** @brief switch on the storage per particle - * if StorePerParticle was false, this function allocates memory and precompute data - * if StorePerParticle was true, this function is no-op - */ - void turnOnStorePerParticle(const ParticleSet& P); - - /// accessor of StorePerParticle - bool isStorePerParticle() const { return StorePerParticle; } - - /// accessor of k_lists_ - const KContainer& getKLists() const { return k_lists_; } - -private: - /// Compute all rhok elements from the start - void computeRhok(const ParticleSet& P); - /** resize the internal data - * @param nkpts - * @param num_species number of species - * @param num_ptcls number of particles - */ - void resize(int nkpts, int num_species, int num_ptcls); - - /// K-Vector List. - const KContainer& k_lists_; - /** Whether intermediate data is stored per particle. default false - * storing data per particle needs significant amount of memory but some calculation may request it. - * storing data per particle specie is more cost-effective - */ - bool StorePerParticle; - /// timer for updateAllPart - NewTimer& update_all_timer_; -}; - -///multi walker shared memory buffer -struct SKMultiWalkerMem : public Resource -{ - using RealType = StructFact::RealType; - - ///dist displ for temporary and old pairs - Matrix> nw_rhok; - - SKMultiWalkerMem() : Resource("SKMultiWalkerMem") {} - - SKMultiWalkerMem(const SKMultiWalkerMem&) : SKMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } -}; +using StructFact = StructFactT; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h index e61ed50beb..b6f3a9269e 100644 --- a/src/Particle/LongRange/StructFactT.h +++ b/src/Particle/LongRange/StructFactT.h @@ -19,18 +19,17 @@ #include "OhmmsPETE/OhmmsMatrix.h" #include "OhmmsPETE/OhmmsVector.h" #include "Particle/ParticleSetTraits.h" -#include -#include -#include -#include +#include "KContainer.h" +#include "NewTimer.h" +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Resource.h" +#include "type_traits/template_types.hpp" namespace qmcplusplus { template class ParticleSetT; template -class KContainerT; -template struct SKMultiWalkerMemT; /** @ingroup longrange diff --git a/src/Particle/LongRange/tests/test_lrhandler.cpp b/src/Particle/LongRange/tests/test_lrhandler.cpp index f3634028c8..9f0d8a67db 100644 --- a/src/Particle/LongRange/tests/test_lrhandler.cpp +++ b/src/Particle/LongRange/tests/test_lrhandler.cpp @@ -15,6 +15,7 @@ #include "Lattice/CrystalLattice.h" #include "Particle/ParticleSet.h" #include "LongRange/LRHandlerBase.h" +#include "Particle/SimulationCell.h" namespace qmcplusplus { diff --git a/src/Particle/MCCoords.cpp b/src/Particle/MCCoords.cpp deleted file mode 100644 index c2849c71ff..0000000000 --- a/src/Particle/MCCoords.cpp +++ /dev/null @@ -1,51 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2022 QMCPACK developers. -// -// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - -#include "MCCoords.hpp" - -namespace qmcplusplus -{ -void MCCoords::getSubset(const std::size_t offset, - const std::size_t size, - MCCoords& out) const -{ - std::copy_n(positions.begin() + offset, size, out.positions.begin()); -} - -MCCoords& MCCoords::operator+=(const MCCoords& rhs) -{ - assert(positions.size() == rhs.positions.size()); - std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), - [](const QMCTraits::PosType& x, const QMCTraits::PosType& y) { return x + y; }); - return *this; -} - -void MCCoords::getSubset(const std::size_t offset, - const std::size_t size, - MCCoords& out) const -{ - std::copy_n(positions.begin() + offset, size, out.positions.begin()); - std::copy_n(spins.begin() + offset, size, out.spins.begin()); -} - -MCCoords& MCCoords::operator+=(const MCCoords& rhs) -{ - assert(positions.size() == rhs.positions.size()); - std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), - [](const QMCTraits::PosType& x, const QMCTraits::PosType& y) { return x + y; }); - std::transform(spins.begin(), spins.end(), rhs.spins.begin(), spins.begin(), - [](const QMCTraits::FullPrecRealType& x, const QMCTraits::FullPrecRealType& y) { return x + y; }); - return *this; -} - -template struct MCCoords; -template struct MCCoords; -} // namespace qmcplusplus diff --git a/src/Particle/MCCoords.hpp b/src/Particle/MCCoords.hpp index 0c623c0888..f6cee15aa5 100644 --- a/src/Particle/MCCoords.hpp +++ b/src/Particle/MCCoords.hpp @@ -14,55 +14,13 @@ #define QMCPLUSPLUS_MCCOORDS_HPP #include "Configuration.h" -#include "type_traits/complex_help.hpp" -#include - -#include +#include "MCCoordsT.hpp" namespace qmcplusplus { -enum class CoordsType -{ - POS, - POS_SPIN -}; - template -struct MCCoords; - -template<> -struct MCCoords -{ - MCCoords(const std::size_t size) : positions(size) {} - - MCCoords& operator+=(const MCCoords& rhs); - - /** get subset of MCCoords - * [param,out] out - */ - void getSubset(const std::size_t offset, const std::size_t size, MCCoords& out) const; - - std::vector positions; -}; - -template<> -struct MCCoords -{ - MCCoords(const std::size_t size) : positions(size), spins(size) {} - - MCCoords& operator+=(const MCCoords& rhs); - - /** get subset of MCCoords - * [param,out] out - */ - void getSubset(const std::size_t offset, const std::size_t size, MCCoords& out) const; - - std::vector positions; - std::vector spins; -}; +using MCCoords = MCCoordsT; -extern template struct MCCoords; -extern template struct MCCoords; } // namespace qmcplusplus #endif diff --git a/src/Particle/MCCoordsT.hpp b/src/Particle/MCCoordsT.hpp index 50b419178f..1ca99aba3f 100644 --- a/src/Particle/MCCoordsT.hpp +++ b/src/Particle/MCCoordsT.hpp @@ -14,7 +14,6 @@ #ifndef QMCPLUSPLUS_MCCOORDST_HPP #define QMCPLUSPLUS_MCCOORDST_HPP -#include "MCCoords.hpp" #include "ParticleSetTraits.h" #include "type_traits/complex_help.hpp" @@ -23,11 +22,11 @@ namespace qmcplusplus { -// enum class CoordsType -// { -// POS, -// POS_SPIN -// }; +enum class CoordsType +{ + POS, + POS_SPIN +}; template struct MCCoordsT; diff --git a/src/Particle/MCWalkerConfiguration.cpp b/src/Particle/MCWalkerConfiguration.cpp deleted file mode 100644 index be957e6dff..0000000000 --- a/src/Particle/MCWalkerConfiguration.cpp +++ /dev/null @@ -1,252 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign -// Bryan Clark, bclark@Princeton.edu, Princeton University -// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "MCWalkerConfiguration.h" -#include "ParticleBase/RandomSeqGenerator.h" -#include "Message/Communicate.h" -#include "Message/CommOperators.h" -#include "Utilities/IteratorUtility.h" -#include "LongRange/StructFact.h" -#include "Particle/HDFWalkerOutput.h" -#include "Particle/MCSample.h" -#include "Particle/Reptile.h" -#include "hdf/hdf_hyperslab.h" -#include "hdf/HDFVersion.h" -#include - -namespace qmcplusplus -{ -MCWalkerConfiguration::MCWalkerConfiguration(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind) - : ParticleSet(simulation_cell, kind), ReadyForPbyP(false), UpdateMode(Update_Walker), reptile(0), Polymer(0) -{} - -MCWalkerConfiguration::MCWalkerConfiguration(const MCWalkerConfiguration& mcw) - : ParticleSet(mcw), ReadyForPbyP(false), UpdateMode(Update_Walker), Polymer(0) -{ - samples.clearEnsemble(); - samples.setMaxSamples(mcw.getMaxSamples()); - setWalkerOffsets(mcw.getWalkerOffsets()); - Properties = mcw.Properties; -} - -MCWalkerConfiguration::~MCWalkerConfiguration() = default; - -void MCWalkerConfiguration::createWalkers(int n) -{ - const int old_nw = getActiveWalkers(); - WalkerConfigurations::createWalkers(n, TotalNum); - // no pre-existing walkers, need to initialized based on particleset. - if (old_nw == 0) - for (auto& awalker : walker_list_) - { - awalker->R = R; - awalker->spins = spins; - } - resizeWalkerHistories(); -} - - -void MCWalkerConfiguration::resize(int numWalkers, int numPtcls) -{ - if (TotalNum && walker_list_.size()) - app_warning() << "MCWalkerConfiguration::resize cleans up the walker list." << std::endl; - const int old_nw = getActiveWalkers(); - ParticleSet::resize(unsigned(numPtcls)); - WalkerConfigurations::resize(numWalkers, TotalNum); - // no pre-existing walkers, need to initialized based on particleset. - if (old_nw == 0) - for (auto& awalker : walker_list_) - { - awalker->R = R; - awalker->spins = spins; - } -} - -/** Make Metropolis move to the walkers and save in a temporary array. - * @param it the iterator of the first walker to work on - * @param tauinv inverse of the time step - * - * R + D + X - */ -void MCWalkerConfiguration::sample(iterator it, RealType tauinv) -{ - throw std::runtime_error("MCWalkerConfiguration::sample obsolete"); - // makeGaussRandom(R); - // R *= tauinv; - // R += (*it)->R + (*it)->Drift; -} - -/** reset the Property container of all the walkers - */ -void MCWalkerConfiguration::resetWalkerProperty(int ncopy) -{ - int m(PropertyList.size()); - app_log() << " Resetting Properties of the walkers " << ncopy << " x " << m << std::endl; - try - { - Properties.resize(ncopy, m); - } - catch (std::domain_error& de) - { - app_error() << de.what() << '\n' - << "This is likely because some object has attempted to add walker properties\n" - << " in excess of WALKER_MAX_PROPERTIES.\n" - << "build with cmake ... -DWALKER_MAX_PROPERTIES=at_least_properties_required" << std::endl; - APP_ABORT("Fatal Exception"); - } - - for (auto& walker : walker_list_) - { - walker->resizeProperty(ncopy, m); - walker->Weight = 1.0; - } - resizeWalkerHistories(); -} - -void MCWalkerConfiguration::resizeWalkerHistories() -{ - //using std::vector > is too costly. - int np = PropertyHistory.size(); - if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PropertyHistory = PropertyHistory; - np = PHindex.size(); - if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PHindex = PHindex; - ; -} - -/** allocate the SampleStack - * @param n number of samples per thread - */ -void MCWalkerConfiguration::setNumSamples(int n) -{ - samples.clearEnsemble(); - samples.setMaxSamples(n); -} - -/** save the current walkers to SampleStack - */ -void MCWalkerConfiguration::saveEnsemble() { saveEnsemble(walker_list_.begin(), walker_list_.end()); } - -/** save the [first,last) walkers to SampleStack - */ -void MCWalkerConfiguration::saveEnsemble(iterator first, iterator last) -{ - for (; first != last; first++) - { - samples.appendSample(MCSample(**first)); - } -} -/** load a single sample from SampleStack - */ -void MCWalkerConfiguration::loadSample(ParticleSet& pset, size_t iw) const { samples.loadSample(pset, iw); } - -/** load SampleStack to walker_list_ - */ -void MCWalkerConfiguration::loadEnsemble() -{ - using WP = WalkerProperties::Indexes; - int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples()); - if (samples.empty() || nsamples == 0) - return; - Walker_t::PropertyContainer_t prop(1, PropertyList.size(), 1, WP::MAXPROPERTIES); - walker_list_.resize(nsamples); - for (int i = 0; i < nsamples; ++i) - { - auto awalker = std::make_unique(TotalNum); - awalker->Properties.copy(prop); - samples.getSample(i).convertToWalker(*awalker); - walker_list_[i] = std::move(awalker); - } - resizeWalkerHistories(); - samples.clearEnsemble(); -} - -bool MCWalkerConfiguration::dumpEnsemble(std::vector& others, - HDFWalkerOutput& out, - int np, - int nBlock) -{ - WalkerConfigurations wctemp; - for (auto* mcwc : others) - { - const auto& astack(mcwc->getSampleStack()); - const size_t sample_size = std::min(mcwc->getMaxSamples(), mcwc->numSamples()); - for (int j = 0; j < sample_size; ++j) - { - const auto& sample = astack.getSample(j); - const size_t num_ptcls = sample.getNumPtcls(); - auto awalker = std::make_unique(num_ptcls); - sample.convertToWalker(*awalker); - wctemp.push_back(std::move(awalker)); - } - } - const int w = wctemp.getActiveWalkers(); - if (w == 0) - return false; - - // The following code assumes the same amount of active walkers on all the MPI ranks - std::vector nwoff(np + 1, 0); - for (int ip = 0; ip < np; ++ip) - nwoff[ip + 1] = nwoff[ip] + w; - wctemp.setWalkerOffsets(nwoff); - out.dump(wctemp, nBlock); - return true; -} - -int MCWalkerConfiguration::getMaxSamples() const { return samples.getMaxSamples(); } - -void MCWalkerConfiguration::loadEnsemble(std::vector& others, bool doclean) -{ - using WP = WalkerProperties::Indexes; - std::vector off(others.size() + 1, 0); - for (int i = 0; i < others.size(); ++i) - { - off[i + 1] = off[i] + std::min(others[i]->getMaxSamples(), others[i]->numSamples()); - } - int nw_tot = off.back(); - if (nw_tot) - { - Walker_t::PropertyContainer_t prop(1, PropertyList.size(), 1, WP::MAXPROPERTIES); - while (walker_list_.size()) - pop_back(); - walker_list_.resize(nw_tot); - for (int i = 0; i < others.size(); ++i) - { - SampleStack& astack(others[i]->getSampleStack()); - for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw) - { - auto awalker = std::make_unique(TotalNum); - awalker->Properties.copy(prop); - astack.getSample(j).convertToWalker(*awalker); - walker_list_[iw] = std::move(awalker); - } - if (doclean) - others[i]->clearEnsemble(); - } - } - if (doclean) - resizeWalkerHistories(); -} - -void MCWalkerConfiguration::clearEnsemble() { samples.clearEnsemble(); } - -} // namespace qmcplusplus diff --git a/src/Particle/MCWalkerConfiguration.h b/src/Particle/MCWalkerConfiguration.h index c32db3f0fb..8e3daf2fab 100644 --- a/src/Particle/MCWalkerConfiguration.h +++ b/src/Particle/MCWalkerConfiguration.h @@ -22,158 +22,13 @@ */ #ifndef QMCPLUSPLUS_MCWALKERCONFIGURATION_H #define QMCPLUSPLUS_MCWALKERCONFIGURATION_H -#include "Particle/ParticleSet.h" -#include "Particle/WalkerConfigurations.h" -#include "Particle/Walker.h" -#include "Particle/SampleStack.h" -#include "Utilities/IteratorUtility.h" -namespace qmcplusplus -{ -//Forward declaration -class MultiChain; -class HDFWalkerOutput; -class Reptile; +#include "Configuration.h" +#include "Particle/MCWalkerConfigurationT.h" -/** A set of walkers that are to be advanced by Metropolis Monte Carlo. - * - *As a derived class from ParticleSet, MCWalkerConfiguration interacts with - *QMCHamiltonian and TrialWaveFunction as a ParticleSet, while QMCDrivers - *use it as multiple walkers whose configurations are advanced according - to MC algorithms. - * - Each walker is represented by Walker and - *MCWalkerConfiguration contains a list of - *the walkers. This class enables two possible moves: - *
    - *
  • move the entire active walkers, similarly to molecu. Suitable for - *small and big systems with a small time step. - *
  • move a particle for each walker. Suitable for large systems. - - *
- */ -class MCWalkerConfiguration : public ParticleSet, public WalkerConfigurations +namespace qmcplusplus { -public: - /**enumeration for update*/ - enum - { - Update_All = 0, ///move all the active walkers - Update_Walker, ///move a walker by walker - Update_Particle ///move a particle by particle - }; - - using Walker_t = WalkerConfigurations::Walker_t; - ///container type of the Properties of a Walker - using PropertyContainer_t = Walker_t::PropertyContainer_t; - ///container type of Walkers - using WalkerList_t = std::vector>; - /// FIX: a type alias of iterator for an object should not be for just one of many objects it holds. - using iterator = WalkerList_t::iterator; - ///const_iterator of Walker container - using const_iterator = WalkerList_t::const_iterator; - - using ReptileList_t = UPtrVector; - - ///default constructor - MCWalkerConfiguration(const SimulationCell& simulation_cell, - const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); - - ///default constructor: copy only ParticleSet - MCWalkerConfiguration(const MCWalkerConfiguration& mcw); - ~MCWalkerConfiguration(); - /** create numWalkers Walkers - * - * Append Walkers to WalkerList. - */ - void createWalkers(int numWalkers); - ///clean up the walker list and make a new list - void resize(int numWalkers, int numPtcls); - - ///clean up the walker list - using WalkerConfigurations::clear; - ///resize Walker::PropertyHistory and Walker::PHindex: - void resizeWalkerHistories(); - - ///make random moves for all the walkers - //void sample(iterator first, iterator last, value_type tauinv); - ///make a random move for a walker - void sample(iterator it, RealType tauinv); - - ///return the number of particles per walker - inline int getParticleNum() const { return R.size(); } - /**@}*/ - - /** set LocalEnergy - * @param e current average Local Energy - */ - inline void setLocalEnergy(RealType e) { LocalEnergy = e; } - - /** return LocalEnergy - */ - inline RealType getLocalEnergy() const { return LocalEnergy; } - - inline MultiChain* getPolymer() { return Polymer; } - - inline void setPolymer(MultiChain* chain) { Polymer = chain; } - - void resetWalkerProperty(int ncopy = 1); - - inline bool updatePbyP() const { return ReadyForPbyP; } - - //@{save/load/clear function for optimization - // - int numSamples() const { return samples.getNumSamples(); } - ///set the number of max samples - void setNumSamples(int n); - ///save the position of current walkers to SampleStack - void saveEnsemble(); - ///save the position of current walkers - void saveEnsemble(iterator first, iterator last); - /// load a single sample from SampleStack - void loadSample(ParticleSet& pset, size_t iw) const; - /// load SampleStack data to the current list of walker configurations - void loadEnsemble(); - /// load the SampleStacks of others to the current list of walker configurations - void loadEnsemble(std::vector& others, bool doclean = true); - /** dump Samples to a file - * @param others MCWalkerConfigurations whose samples will be collected - * @param out engine to write the samples to state_0/walkers - * @param np number of processors - * @return true with non-zero samples - * - * CAUTION: The current implementation assumes the same amount of active walkers on all the MPI ranks. - */ - static bool dumpEnsemble(std::vector& others, HDFWalkerOutput& out, int np, int nBlock); - ///clear the ensemble - void clearEnsemble(); - - const SampleStack& getSampleStack() const { return samples; } - SampleStack& getSampleStack() { return samples; } - - /// Transitional forwarding methods - int getMaxSamples() const; - //@} - -protected: - ///true if the buffer is ready for particle-by-particle updates - bool ReadyForPbyP; - ///update-mode index - int UpdateMode; - - RealType LocalEnergy; - -public: - ///a collection of reptiles contained in MCWalkerConfiguration. - ReptileList_t ReptileList; - Reptile* reptile; - - friend class MCPopulation; - -private: - MultiChain* Polymer; +using MCWalkerConfiguration = MCWalkerConfigurationT; - SampleStack samples; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/MCWalkerConfigurationT.cpp b/src/Particle/MCWalkerConfigurationT.cpp index 1f3fcaa1c0..7867a9195e 100644 --- a/src/Particle/MCWalkerConfigurationT.cpp +++ b/src/Particle/MCWalkerConfigurationT.cpp @@ -59,7 +59,7 @@ MCWalkerConfigurationT::MCWalkerConfigurationT( { samples.clearEnsemble(); samples.setMaxSamples(mcw.getMaxSamples()); - setWalkerOffsets(mcw.getWalkerOffsets()); + this->setWalkerOffsets(mcw.getWalkerOffsets()); this->Properties = mcw.Properties; } @@ -70,11 +70,11 @@ template void MCWalkerConfigurationT::createWalkers(int n) { - const int old_nw = getActiveWalkers(); - WalkerConfigurations::createWalkers(n, this->TotalNum); + const int old_nw = this->getActiveWalkers(); + WalkerConfigurationsT::createWalkers(n, this->TotalNum); // no pre-existing walkers, need to initialized based on particleset. if (old_nw == 0) - for (auto& awalker : walker_list_) { + for (auto& awalker : this->walker_list_) { awalker->R = this->R; awalker->spins = this->spins; } @@ -85,16 +85,16 @@ template void MCWalkerConfigurationT::resize(int numWalkers, int numPtcls) { - if (this->TotalNum && walker_list_.size()) + if (this->TotalNum && this->walker_list_.size()) app_warning() << "MCWalkerConfiguration::resize cleans up the walker list." << std::endl; - const int old_nw = getActiveWalkers(); + const int old_nw = this->getActiveWalkers(); ParticleSetT::resize(unsigned(numPtcls)); - WalkerConfigurations::resize(numWalkers, this->TotalNum); + WalkerConfigurationsT::resize(numWalkers, this->TotalNum); // no pre-existing walkers, need to initialized based on particleset. if (old_nw == 0) - for (auto& awalker : walker_list_) { + for (auto& awalker : this->walker_list_) { awalker->R = this->R; awalker->spins = this->spins; } @@ -139,7 +139,7 @@ MCWalkerConfigurationT::resetWalkerProperty(int ncopy) APP_ABORT("Fatal Exception"); } - for (auto& walker : walker_list_) { + for (auto& walker : this->walker_list_) { walker->resizeProperty(ncopy, m); walker->Weight = 1.0; } @@ -153,12 +153,12 @@ MCWalkerConfigurationT::resizeWalkerHistories() // using std::vector > is too costly. int np = this->PropertyHistory.size(); if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PropertyHistory = this->PropertyHistory; + for (int iw = 0; iw < this->walker_list_.size(); ++iw) + this->walker_list_[iw]->PropertyHistory = this->PropertyHistory; np = this->PHindex.size(); if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PHindex = this->PHindex; + for (int iw = 0; iw < this->walker_list_.size(); ++iw) + this->walker_list_[iw]->PHindex = this->PHindex; ; } @@ -179,7 +179,7 @@ template void MCWalkerConfigurationT::saveEnsemble() { - saveEnsemble(walker_list_.begin(), walker_list_.end()); + saveEnsemble(this->walker_list_.begin(), this->walker_list_.end()); } /** save the [first,last) walkers to SampleStack @@ -211,14 +211,14 @@ MCWalkerConfigurationT::loadEnsemble() int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples()); if (samples.empty() || nsamples == 0) return; - Walker_t::PropertyContainer_t prop( + typename Walker_t::PropertyContainer_t prop( 1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); - walker_list_.resize(nsamples); + this->walker_list_.resize(nsamples); for (int i = 0; i < nsamples; ++i) { auto awalker = std::make_unique(this->TotalNum); awalker->Properties.copy(prop); samples.getSample(i).convertToWalker(*awalker); - walker_list_[i] = std::move(awalker); + this->walker_list_[i] = std::move(awalker); } resizeWalkerHistories(); samples.clearEnsemble(); @@ -230,7 +230,7 @@ MCWalkerConfigurationT::dumpEnsemble( std::vector*>& others, HDFWalkerOutput& out, int np, int nBlock) { - WalkerConfigurations wctemp; + WalkerConfigurationsT wctemp; for (auto* mcwc : others) { const auto& astack(mcwc->getSampleStack()); const size_t sample_size = @@ -277,18 +277,18 @@ MCWalkerConfigurationT::loadEnsemble( } int nw_tot = off.back(); if (nw_tot) { - Walker_t::PropertyContainer_t prop( + typename Walker_t::PropertyContainer_t prop( 1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); - while (walker_list_.size()) - pop_back(); - walker_list_.resize(nw_tot); + while (this->walker_list_.size()) + this->pop_back(); + this->walker_list_.resize(nw_tot); for (int i = 0; i < others.size(); ++i) { SampleStackT& astack(others[i]->getSampleStack()); for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw) { auto awalker = std::make_unique(this->TotalNum); awalker->Properties.copy(prop); astack.getSample(j).convertToWalker(*awalker); - walker_list_[iw] = std::move(awalker); + this->walker_list_[iw] = std::move(awalker); } if (doclean) others[i]->clearEnsemble(); @@ -305,9 +305,18 @@ MCWalkerConfigurationT::clearEnsemble() samples.clearEnsemble(); } +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION template class MCWalkerConfigurationT; +#else template class MCWalkerConfigurationT; +#endif +#else +#ifndef MIXED_PRECISION template class MCWalkerConfigurationT>; +#else template class MCWalkerConfigurationT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/Particle/MCWalkerConfigurationT.h b/src/Particle/MCWalkerConfigurationT.h index 49a159e51d..4de261c12a 100644 --- a/src/Particle/MCWalkerConfigurationT.h +++ b/src/Particle/MCWalkerConfigurationT.h @@ -29,7 +29,7 @@ #include "Particle/ParticleSetT.h" #include "Particle/SampleStackT.h" #include "Particle/Walker.h" -#include "Particle/WalkerConfigurations.h" +#include "Particle/WalkerConfigurationsT.h" #include "Utilities/IteratorUtility.h" namespace qmcplusplus @@ -60,7 +60,7 @@ class ReptileT; template class MCWalkerConfigurationT : public ParticleSetT, - public WalkerConfigurations + public WalkerConfigurationsT { public: /**enumeration for update*/ @@ -71,16 +71,16 @@ class MCWalkerConfigurationT : Update_Particle /// move a particle by particle }; - using Walker_t = WalkerConfigurations::Walker_t; + using Walker_t = typename WalkerConfigurationsT::Walker_t; /// container type of the Properties of a Walker - using PropertyContainer_t = Walker_t::PropertyContainer_t; + using PropertyContainer_t = typename Walker_t::PropertyContainer_t; /// container type of Walkers using WalkerList_t = std::vector>; /// FIX: a type alias of iterator for an object should not be for just one /// of many objects it holds. - using iterator = WalkerList_t::iterator; + using iterator = typename WalkerList_t::iterator; /// const_iterator of Walker container - using const_iterator = WalkerList_t::const_iterator; + using const_iterator = typename WalkerList_t::const_iterator; using ReptileList_t = UPtrVector>; @@ -104,7 +104,7 @@ class MCWalkerConfigurationT : resize(int numWalkers, int numPtcls); /// clean up the walker list - using WalkerConfigurations::clear; + using WalkerConfigurationsT::clear; /// resize Walker::PropertyHistory and Walker::PHindex: void resizeWalkerHistories(); diff --git a/src/Particle/PSdispatcher.cpp b/src/Particle/PSdispatcher.cpp index 44dbb23965..d86e8fd8a6 100644 --- a/src/Particle/PSdispatcher.cpp +++ b/src/Particle/PSdispatcher.cpp @@ -64,7 +64,7 @@ void PSdispatcher::flex_accept_rejectMove(const RefVectorWithLeader bool forward_mode) const { if (use_batch_) - ParticleSet::mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode); + ParticleSet::mw_accept_rejectMoveT(p_list, iat, isAccepted, forward_mode); else for (size_t iw = 0; iw < p_list.size(); iw++) p_list[iw].accept_rejectMove(iat, isAccepted[iw], forward_mode); diff --git a/src/Particle/PSdispatcher.h b/src/Particle/PSdispatcher.h index 50b03c13a6..36e714adb4 100644 --- a/src/Particle/PSdispatcher.h +++ b/src/Particle/PSdispatcher.h @@ -13,6 +13,7 @@ #ifndef QMCPLUSPLUS_PSDISPATCH_H #define QMCPLUSPLUS_PSDISPATCH_H +#include "MCCoords.hpp" #include "ParticleSet.h" namespace qmcplusplus diff --git a/src/Particle/ParticleIO/XMLParticleIO.cpp b/src/Particle/ParticleIO/XMLParticleIO.cpp index e5c29cbc59..ee0cfba8f2 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.cpp +++ b/src/Particle/ParticleIO/XMLParticleIO.cpp @@ -878,12 +878,13 @@ void XMLParticleParserT::getPtclAttrib(xmlNodePtr cur, int in_offset, int cop } } +#ifndef QMC_COMPLEX template class XMLParticleParserT; template class XMLParticleParserT; +#else template class XMLParticleParserT>; template class XMLParticleParserT>; - - +#endif XMLSaveParticle::XMLSaveParticle(Particle_t& pin) : ref_(pin) {} diff --git a/src/Particle/ParticleSet.BC.cpp b/src/Particle/ParticleSet.BC.cpp deleted file mode 100644 index 2c1469f556..0000000000 --- a/src/Particle/ParticleSet.BC.cpp +++ /dev/null @@ -1,194 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/**@file ParticleSet.BC.cpp - * @brief definition of functions controlling Boundary Conditions - */ -#include "Particle/ParticleSet.h" -#include "Particle/FastParticleOperators.h" -#include "Concurrency/OpenMP.h" -#include "LongRange/StructFact.h" - -namespace qmcplusplus -{ -/** Creating StructureFactor - * - * Currently testing only 1 component for PBCs. - */ -void ParticleSet::createSK() -{ - if (structure_factor_) - throw std::runtime_error("Report bug! structure_factor_ has already been created. Unexpected call sequence."); - - auto& Lattice = getLattice(); - auto& LRBox = getLRBox(); - if (Lattice.explicitly_defined) - convert2Cart(R); //make sure that R is in Cartesian coordinates - - if (Lattice.SuperCellEnum != SUPERCELL_OPEN) - { - app_log() << "\n Creating Structure Factor for periodic systems " << LRBox.LR_kc << std::endl; - structure_factor_ = std::make_unique(LRBox, simulation_cell_.getKLists()); - } - - //set the mass array - int beforemass = my_species_.numAttributes(); - int massind = my_species_.addAttribute("mass"); - if (beforemass == massind) - { - app_log() << " ParticleSet::createSK setting mass of " << getName() << " to 1.0" << std::endl; - for (int ig = 0; ig < my_species_.getTotalNum(); ++ig) - my_species_(massind, ig) = 1.0; - } - for (int iat = 0; iat < GroupID.size(); iat++) - Mass[iat] = my_species_(massind, GroupID[iat]); - - coordinates_->setAllParticlePos(R); -} - -void ParticleSet::turnOnPerParticleSK() -{ - if (structure_factor_) - structure_factor_->turnOnStorePerParticle(*this); - else - throw std::runtime_error("ParticleSet::turnOnPerParticleSK trying to turn on per particle storage in " - "structure_factor_ but structure_factor_ has not been created."); -} - -bool ParticleSet::getPerParticleSKState() const -{ - bool isPerParticleOn = false; - if (structure_factor_) - isPerParticleOn = structure_factor_->isStorePerParticle(); - return isPerParticleOn; -} - -void ParticleSet::convert(const ParticlePos& pin, ParticlePos& pout) -{ - if (pin.getUnit() == pout.getUnit()) - { - pout = pin; - return; - } - if (pin.getUnit() == PosUnit::Lattice) - //convert to CartesianUnit - { - ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); - } - else - //convert to getLattice()Unit - { - ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); - } -} - -void ParticleSet::convert2Unit(const ParticlePos& pin, ParticlePos& pout) -{ - pout.setUnit(PosUnit::Lattice); - if (pin.getUnit() == PosUnit::Lattice) - pout = pin; - else - ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); -} - -void ParticleSet::convert2Cart(const ParticlePos& pin, ParticlePos& pout) -{ - pout.setUnit(PosUnit::Cartesian); - if (pin.getUnit() == PosUnit::Cartesian) - pout = pin; - else - ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); -} - -void ParticleSet::convert2Unit(ParticlePos& pinout) -{ - if (pinout.getUnit() == PosUnit::Lattice) - return; - else - { - pinout.setUnit(PosUnit::Lattice); - ConvertPosUnit::apply(pinout, getLattice().G, 0, pinout.size()); - } -} - -void ParticleSet::convert2Cart(ParticlePos& pinout) -{ - if (pinout.getUnit() == PosUnit::Cartesian) - return; - else - { - pinout.setUnit(PosUnit::Cartesian); - ConvertPosUnit::apply(pinout, getLattice().R, 0, pinout.size()); - } -} - -void ParticleSet::applyBC(const ParticlePos& pin, ParticlePos& pout) { applyBC(pin, pout, 0, pin.size()); } - -void ParticleSet::applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last) -{ - if (pin.getUnit() == PosUnit::Cartesian) - { - if (pout.getUnit() == PosUnit::Cartesian) - ApplyBConds::Cart2Cart(pin, getLattice().G, getLattice().R, pout, first, last); - else if (pout.getUnit() == PosUnit::Lattice) - ApplyBConds::Cart2Unit(pin, getLattice().G, pout, first, last); - else - throw std::runtime_error("Unknown unit conversion"); - } - else if (pin.getUnit() == PosUnit::Lattice) - { - if (pout.getUnit() == PosUnit::Cartesian) - ApplyBConds::Unit2Cart(pin, getLattice().R, pout, first, last); - else if (pout.getUnit() == PosUnit::Lattice) - ApplyBConds::Unit2Unit(pin, pout, first, last); - else - throw std::runtime_error("Unknown unit conversion"); - } - else - throw std::runtime_error("Unknown unit conversion"); -} - -void ParticleSet::applyBC(ParticlePos& pos) -{ - if (pos.getUnit() == PosUnit::Lattice) - { - ApplyBConds::Unit2Unit(pos, 0, TotalNum); - } - else - { - ApplyBConds::Cart2Cart(pos, getLattice().G, getLattice().R, 0, TotalNum); - } -} - -void ParticleSet::applyMinimumImage(ParticlePos& pinout) -{ - if (getLattice().SuperCellEnum == SUPERCELL_OPEN) - return; - for (int i = 0; i < pinout.size(); ++i) - getLattice().applyMinimumImage(pinout[i]); -} - -void ParticleSet::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout) -{ - pout.setUnit(PosUnit::Lattice); - convert2Unit(pin, pout); // convert to crystalline unit - put2box(pout); -} - -void ParticleSet::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout) -{ - convert2UnitInBox(pin, pout); // convert to crystalline unit - convert2Cart(pout); -} -} // namespace qmcplusplus diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSet.cpp deleted file mode 100644 index d297c7444f..0000000000 --- a/src/Particle/ParticleSet.cpp +++ /dev/null @@ -1,1018 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include -#include -#include "ParticleSet.h" -#include "Particle/DynamicCoordinatesBuilder.h" -#include "Particle/DistanceTable.h" -#include "Particle/createDistanceTable.h" -#include "LongRange/StructFact.h" -#include "Utilities/IteratorUtility.h" -#include "Utilities/RandomGenerator.h" -#include "ParticleBase/RandomSeqGeneratorGlobal.h" -#include "ResourceCollection.h" - -namespace qmcplusplus -{ -using WP = WalkerProperties::Indexes; - -enum PSetTimers -{ - PS_newpos, - PS_donePbyP, - PS_accept, - PS_loadWalker, - PS_update, - PS_dt_move, - PS_mw_copy -}; - -static const TimerNameList_t generatePSetTimerNames(std::string& obj_name) -{ - return {{PS_newpos, "ParticleSet:" + obj_name + "::computeNewPosDT"}, - {PS_donePbyP, "ParticleSet:" + obj_name + "::donePbyP"}, - {PS_accept, "ParticleSet:" + obj_name + "::acceptMove"}, - {PS_loadWalker, "ParticleSet:" + obj_name + "::loadWalker"}, - {PS_update, "ParticleSet:" + obj_name + "::update"}, - {PS_dt_move, "ParticleSet:" + obj_name + "::dt_move"}, - {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}}; -} - -ParticleSet::ParticleSet(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind) - : quantum_domain(classical), - Properties(0, 0, 1, WP::MAXPROPERTIES), - simulation_cell_(simulation_cell), - same_mass_(true), - is_spinor_(false), - active_ptcl_(-1), - active_spin_val_(0.0), - myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), timer_level_medium), - myTwist(0.0), - ParentName("0"), - TotalNum(0), - group_offsets_(std::make_shared>>()), - coordinates_(createDynamicCoordinates(kind)) -{ - initPropertyList(); -} - -ParticleSet::ParticleSet(const ParticleSet& p) - : Properties(p.Properties), - simulation_cell_(p.simulation_cell_), - same_mass_(true), - is_spinor_(false), - active_ptcl_(-1), - active_spin_val_(0.0), - my_species_(p.getSpeciesSet()), - myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), timer_level_medium), - myTwist(0.0), - ParentName(p.parentName()), - group_offsets_(p.group_offsets_), - coordinates_(p.coordinates_->makeClone()) -{ - setQuantumDomain(p.quantum_domain); - - resize(p.getTotalNum()); - R.InUnit = p.R.InUnit; - R = p.R; - spins = p.spins; - GroupID = p.GroupID; - is_spinor_ = p.is_spinor_; - - //need explicit copy: - Mass = p.Mass; - Z = p.Z; - //std::ostringstream o; - //o<setName(o.str()); - //app_log() << " Copying a particle set " << p.getName() << " to " << this->getName() << " groups=" << groups() << std::endl; - myName = p.getName(); - PropertyList.Names = p.PropertyList.Names; - PropertyList.Values = p.PropertyList.Values; - PropertyHistory = p.PropertyHistory; - Collectables = p.Collectables; - //construct the distance tables with the same order - for (int i = 0; i < p.DistTables.size(); ++i) - addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes()); - - if (p.structure_factor_) - structure_factor_ = std::make_unique(*p.structure_factor_); - myTwist = p.myTwist; - - G = p.G; - L = p.L; -} - -ParticleSet::~ParticleSet() = default; - -void ParticleSet::create(const std::vector& agroup) -{ - auto& group_offsets(*group_offsets_); - group_offsets.resize(agroup.size() + 1); - group_offsets[0] = 0; - for (int is = 0; is < agroup.size(); is++) - group_offsets[is + 1] = group_offsets[is] + agroup[is]; - group_offsets.updateTo(); - const size_t nsum = group_offsets[agroup.size()]; - resize(nsum); - TotalNum = nsum; - int loc = 0; - for (int i = 0; i < agroup.size(); i++) - for (int j = 0; j < agroup[i]; j++, loc++) - GroupID[loc] = i; -} - -void ParticleSet::setQuantumDomain(quantum_domains qdomain) -{ - if (quantumDomainValid(qdomain)) - quantum_domain = qdomain; - else - throw std::runtime_error("ParticleSet::setQuantumDomain\n input quantum domain is not valid for particles"); -} - -void ParticleSet::resetGroups() -{ - const int nspecies = my_species_.getTotalNum(); - // Usually an empty ParticleSet indicates an error in the input file, - // but in some cases it is useful. Allow an empty ParticleSet if it - // has the special name "empty". - if (nspecies == 0 && getName() != "empty") - { - throw std::runtime_error("ParticleSet::resetGroups() Failed. No species exisits"); - } - int natt = my_species_.numAttributes(); - int qind = my_species_.addAttribute("charge"); - if (natt == qind) - { - app_log() << " Missing charge attribute of the SpeciesSet " << myName << " particleset" << std::endl; - app_log() << " Assume neutral particles Z=0.0 " << std::endl; - for (int ig = 0; ig < nspecies; ig++) - my_species_(qind, ig) = 0.0; - } - for (int iat = 0; iat < Z.size(); iat++) - Z[iat] = my_species_(qind, GroupID[iat]); - natt = my_species_.numAttributes(); - int massind = my_species_.addAttribute("mass"); - if (massind == natt) - { - for (int ig = 0; ig < nspecies; ig++) - my_species_(massind, ig) = 1.0; - } - same_mass_ = true; - double m0 = my_species_(massind, 0); - for (int ig = 1; ig < nspecies; ig++) - same_mass_ &= (my_species_(massind, ig) == m0); - if (same_mass_) - app_log() << " All the species have the same mass " << m0 << std::endl; - else - app_log() << " Distinctive masses for each species " << std::endl; - for (int iat = 0; iat < Mass.size(); iat++) - Mass[iat] = my_species_(massind, GroupID[iat]); - - int membersize = my_species_.addAttribute("membersize"); - for (int ig = 0; ig < nspecies; ++ig) - my_species_(membersize, ig) = groupsize(ig); - - for (int iat = 0; iat < GroupID.size(); iat++) - assert(GroupID[iat] < nspecies); -} - -void ParticleSet::randomizeFromSource(ParticleSet& src) -{ - SpeciesSet& srcSpSet(src.getSpeciesSet()); - SpeciesSet& spSet(getSpeciesSet()); - int srcChargeIndx = srcSpSet.addAttribute("charge"); - int srcMemberIndx = srcSpSet.addAttribute("membersize"); - int ChargeIndex = spSet.addAttribute("charge"); - int MemberIndx = spSet.addAttribute("membersize"); - int Nsrc = src.getTotalNum(); - int Nptcl = getTotalNum(); - int NumSpecies = spSet.TotalNum; - int NumSrcSpecies = srcSpSet.TotalNum; - //Store information about charges and number of each species - std::vector Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec; - Zat.resize(Nsrc); - Zspec.resize(NumSrcSpecies); - NofSpecies.resize(NumSpecies); - CurElec.resize(NumSpecies); - NofSrcSpecies.resize(NumSrcSpecies); - for (int spec = 0; spec < NumSrcSpecies; spec++) - { - Zspec[spec] = (int)round(srcSpSet(srcChargeIndx, spec)); - NofSrcSpecies[spec] = (int)round(srcSpSet(srcMemberIndx, spec)); - } - for (int spec = 0; spec < NumSpecies; spec++) - { - NofSpecies[spec] = (int)round(spSet(MemberIndx, spec)); - CurElec[spec] = first(spec); - } - int totQ = 0; - for (int iat = 0; iat < Nsrc; iat++) - totQ += Zat[iat] = Zspec[src.GroupID[iat]]; - app_log() << " Total ion charge = " << totQ << std::endl; - totQ -= Nptcl; - app_log() << " Total system charge = " << totQ << std::endl; - // Now, loop over ions, attaching electrons to them to neutralize - // charge - int spToken = 0; - // This is decremented when we run out of electrons in each species - int spLeft = NumSpecies; - std::vector gaussRand(Nptcl); - makeGaussRandom(gaussRand); - for (int iat = 0; iat < Nsrc; iat++) - { - // Loop over electrons to add, selecting round-robin from the - // electron species - int z = Zat[iat]; - while (z > 0 && spLeft) - { - int sp = spToken++ % NumSpecies; - if (NofSpecies[sp]) - { - NofSpecies[sp]--; - z--; - int elec = CurElec[sp]++; - app_log() << " Assigning " << (sp ? "down" : "up ") << " electron " << elec << " to ion " << iat - << " with charge " << z << std::endl; - double radius = 0.5 * std::sqrt((double)Zat[iat]); - R[elec] = src.R[iat] + radius * gaussRand[elec]; - } - else - spLeft--; - } - } - // Assign remaining electrons - int ion = 0; - for (int sp = 0; sp < NumSpecies; sp++) - { - for (int ie = 0; ie < NofSpecies[sp]; ie++) - { - int iat = ion++ % Nsrc; - double radius = std::sqrt((double)Zat[iat]); - int elec = CurElec[sp]++; - R[elec] = src.R[iat] + radius * gaussRand[elec]; - } - } -} - -void ParticleSet::print(std::ostream& os, const size_t maxParticlesToPrint) const -{ - os << " ParticleSet '" << getName() << "' contains " << TotalNum << " particles : "; - if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0) - for (int i = 0; i < group_offsets.size() - 1; i++) - os << " " << my_species_.speciesName[i] << "(" << group_offsets[i + 1] - group_offsets[i] << ")"; - os << std::endl << std::endl; - - const size_t numToPrint = maxParticlesToPrint == 0 ? TotalNum : std::min(TotalNum, maxParticlesToPrint); - - for (int i = 0; i < numToPrint; i++) - { - os << " " << my_species_.speciesName[GroupID[i]] << R[i] << std::endl; - } - if (numToPrint < TotalNum) - { - os << " (... and " << (TotalNum - numToPrint) << " more particle positions ...)" << std::endl; - } - os << std::endl; - - for (const std::string& description : distTableDescriptions) - os << description; - os << std::endl; -} - -bool ParticleSet::get(std::ostream& is) const { return true; } -bool ParticleSet::put(std::istream& is) { return true; } -void ParticleSet::reset() { app_log() << "<<<< going to set properties >>>> " << std::endl; } - -///read the particleset -bool ParticleSet::put(xmlNodePtr cur) { return true; } - -int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes) -{ - if (myName == "none" || psrc.getName() == "none") - throw std::runtime_error("ParticleSet::addTable needs proper names for both source and target particle sets."); - - int tid; - std::map::iterator tit(myDistTableMap.find(psrc.getName())); - if (tit == myDistTableMap.end()) - { - std::ostringstream description; - tid = DistTables.size(); - if (myName == psrc.getName()) - DistTables.push_back(createDistanceTable(*this, description)); - else - DistTables.push_back(createDistanceTable(psrc, *this, description)); - distTableDescriptions.push_back(description.str()); - myDistTableMap[psrc.getName()] = tid; - app_debug() << " ... ParticleSet::addTable Create Table #" << tid << " " << DistTables[tid]->getName() - << std::endl; - } - else - { - tid = (*tit).second; - app_debug() << " ... ParticleSet::addTable Reuse Table #" << tid << " " << DistTables[tid]->getName() << std::endl; - } - - DistTables[tid]->setModes(DistTables[tid]->getModes() | modes); - - app_log().flush(); - return tid; -} - -const DistanceTableAA& ParticleSet::getDistTableAA(int table_ID) const -{ - return dynamic_cast(*DistTables[table_ID]); -} - -const DistanceTableAB& ParticleSet::getDistTableAB(int table_ID) const -{ - return dynamic_cast(*DistTables[table_ID]); -} - -void ParticleSet::update(bool skipSK) -{ - ScopedTimer update_scope(myTimers[PS_update]); - - coordinates_->setAllParticlePos(R); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->evaluate(*this); - if (!skipSK && structure_factor_) - structure_factor_->updateAllPart(*this); - - active_ptcl_ = -1; -} - -void ParticleSet::mw_update(const RefVectorWithLeader& p_list, bool skipSK) -{ - auto& p_leader = p_list.getLeader(); - ScopedTimer update_scope(p_leader.myTimers[PS_update]); - - for (ParticleSet& pset : p_list) - pset.coordinates_->setAllParticlePos(pset.R); - - auto& dts = p_leader.DistTables; - for (int i = 0; i < dts.size(); ++i) - { - const auto dt_list(extractDTRefList(p_list, i)); - dts[i]->mw_evaluate(dt_list, p_list); - } - - if (!skipSK && p_leader.structure_factor_) - for (int iw = 0; iw < p_list.size(); iw++) - p_list[iw].structure_factor_->updateAllPart(p_list[iw]); -} - -void ParticleSet::makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept) -{ - active_ptcl_ = iat; - active_pos_ = R[iat] + displ; - active_spin_val_ = spins[iat]; - computeNewPosDistTables(iat, active_pos_, maybe_accept); -} - -void ParticleSet::makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) -{ - makeMove(iat, displ); - active_spin_val_ += sdispl; -} - -template -void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, Index_t iat, const MCCoords& displs) -{ - mw_makeMove(p_list, iat, displs.positions); - if constexpr (CT == CoordsType::POS_SPIN) - mw_makeSpinMove(p_list, iat, displs.spins); -} - -void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& displs) -{ - std::vector new_positions; - new_positions.reserve(displs.size()); - - for (int iw = 0; iw < p_list.size(); iw++) - { - p_list[iw].active_ptcl_ = iat; - p_list[iw].active_pos_ = p_list[iw].R[iat] + displs[iw]; - new_positions.push_back(p_list[iw].active_pos_); - } - - mw_computeNewPosDistTables(p_list, iat, new_positions); -} - -void ParticleSet::mw_makeSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& sdispls) -{ - for (int iw = 0; iw < p_list.size(); iw++) - p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw]; -} - -bool ParticleSet::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ) -{ - active_ptcl_ = iat; - active_pos_ = R[iat] + displ; - active_spin_val_ = spins[iat]; - bool is_valid = true; - auto& Lattice = simulation_cell_.getLattice(); - if (Lattice.explicitly_defined) - { - if (Lattice.outOfBound(Lattice.toUnit(displ))) - is_valid = false; - else - { - SingleParticlePos newRedPos = Lattice.toUnit(active_pos_); - if (!Lattice.isValid(newRedPos)) - is_valid = false; - } - } - computeNewPosDistTables(iat, active_pos_, true); - return is_valid; -} - -bool ParticleSet::makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) -{ - bool is_valid = makeMoveAndCheck(iat, displ); - active_spin_val_ += sdispl; - return is_valid; -} - -void ParticleSet::computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept) -{ - ScopedTimer compute_newpos_scope(myTimers[PS_newpos]); - - for (int i = 0; i < DistTables.size(); ++i) - DistTables[i]->move(*this, newpos, iat, maybe_accept); -} - -void ParticleSet::mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& new_positions, - bool maybe_accept) -{ - ParticleSet& p_leader = p_list.getLeader(); - ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]); - - { - ScopedTimer copy_scope(p_leader.myTimers[PS_mw_copy]); - const auto coords_list(extractCoordsRefList(p_list)); - p_leader.coordinates_->mw_copyActivePos(coords_list, iat, new_positions); - } - - { - ScopedTimer dt_scope(p_leader.myTimers[PS_dt_move]); - const int dist_tables_size = p_leader.DistTables.size(); - for (int i = 0; i < dist_tables_size; ++i) - { - const auto dt_list(extractDTRefList(p_list, i)); - p_leader.DistTables[i]->mw_move(dt_list, p_list, new_positions, iat, maybe_accept); - } - - // DistTables mw_move calls are asynchronous. Wait for them before return. - PRAGMA_OFFLOAD("omp taskwait") - } -} - - -bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt) -{ - active_ptcl_ = -1; - auto& Lattice = simulation_cell_.getLattice(); - if (Lattice.explicitly_defined) - { - for (int iat = 0; iat < deltaR.size(); ++iat) - { - SingleParticlePos displ(dt * deltaR[iat]); - if (Lattice.outOfBound(Lattice.toUnit(displ))) - return false; - SingleParticlePos newpos(awalker.R[iat] + displ); - if (!Lattice.isValid(Lattice.toUnit(newpos))) - return false; - R[iat] = newpos; - } - } - else - { - for (int iat = 0; iat < deltaR.size(); ++iat) - R[iat] = awalker.R[iat] + dt * deltaR[iat]; - } - coordinates_->setAllParticlePos(R); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->evaluate(*this); - if (structure_factor_) - structure_factor_->updateAllPart(*this); - //every move is valid - return true; -} - -bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, - const ParticlePos& deltaR, - const std::vector& dt) -{ - active_ptcl_ = -1; - auto& Lattice = simulation_cell_.getLattice(); - if (Lattice.explicitly_defined) - { - for (int iat = 0; iat < deltaR.size(); ++iat) - { - SingleParticlePos displ(dt[iat] * deltaR[iat]); - if (Lattice.outOfBound(Lattice.toUnit(displ))) - return false; - SingleParticlePos newpos(awalker.R[iat] + displ); - if (!Lattice.isValid(Lattice.toUnit(newpos))) - return false; - R[iat] = newpos; - } - } - else - { - for (int iat = 0; iat < deltaR.size(); ++iat) - R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat]; - } - coordinates_->setAllParticlePos(R); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->evaluate(*this); - if (structure_factor_) - structure_factor_->updateAllPart(*this); - //every move is valid - return true; -} - -/** move a walker by dt*deltaR + drift - * @param awalker initial walker configuration - * @param drift drift vector - * @param deltaR random displacement - * @param dt timestep - * @return true, if all the particle moves are legal under the boundary conditions - */ -bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - RealType dt) -{ - active_ptcl_ = -1; - auto& Lattice = simulation_cell_.getLattice(); - if (Lattice.explicitly_defined) - { - for (int iat = 0; iat < deltaR.size(); ++iat) - { - SingleParticlePos displ(dt * deltaR[iat] + drift[iat]); - if (Lattice.outOfBound(Lattice.toUnit(displ))) - return false; - SingleParticlePos newpos(awalker.R[iat] + displ); - if (!Lattice.isValid(Lattice.toUnit(newpos))) - return false; - R[iat] = newpos; - } - } - else - { - for (int iat = 0; iat < deltaR.size(); ++iat) - R[iat] = awalker.R[iat] + dt * deltaR[iat] + drift[iat]; - } - coordinates_->setAllParticlePos(R); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->evaluate(*this); - if (structure_factor_) - structure_factor_->updateAllPart(*this); - //every move is valid - return true; -} - -bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - const std::vector& dt) -{ - active_ptcl_ = -1; - auto& Lattice = simulation_cell_.getLattice(); - if (Lattice.explicitly_defined) - { - for (int iat = 0; iat < deltaR.size(); ++iat) - { - SingleParticlePos displ(dt[iat] * deltaR[iat] + drift[iat]); - if (Lattice.outOfBound(Lattice.toUnit(displ))) - return false; - SingleParticlePos newpos(awalker.R[iat] + displ); - if (!Lattice.isValid(Lattice.toUnit(newpos))) - return false; - R[iat] = newpos; - } - } - else - { - for (int iat = 0; iat < deltaR.size(); ++iat) - R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat] + drift[iat]; - } - coordinates_->setAllParticlePos(R); - - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->evaluate(*this); - if (structure_factor_) - structure_factor_->updateAllPart(*this); - //every move is valid - return true; -} - -/** update the particle attribute by the proposed move - * - * When the active_ptcl_ is equal to iat, overwrite the position and update the - * content of the distance tables. - */ -void ParticleSet::acceptMove(Index_t iat) -{ -#ifndef NDEBUG - if (iat != active_ptcl_) - throw std::runtime_error("Bug detected by acceptMove! Request electron is not active!"); -#endif - ScopedTimer update_scope(myTimers[PS_accept]); - //Update position + distance-table - coordinates_->setOneParticlePos(active_pos_, iat); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->update(iat); - - R[iat] = active_pos_; - spins[iat] = active_spin_val_; - active_ptcl_ = -1; -} - -void ParticleSet::acceptMoveForwardMode(Index_t iat) -{ - assert(iat == active_ptcl_); - ScopedTimer update_scope(myTimers[PS_accept]); - //Update position + distance-table - coordinates_->setOneParticlePos(active_pos_, iat); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->updatePartial(iat, true); - - R[iat] = active_pos_; - spins[iat] = active_spin_val_; - active_ptcl_ = -1; -} - -void ParticleSet::accept_rejectMove(Index_t iat, bool accepted, bool forward_mode) -{ - if (forward_mode) - if (accepted) - acceptMoveForwardMode(iat); - else - rejectMoveForwardMode(iat); - else if (accepted) - acceptMove(iat); - else - rejectMove(iat); -} - -void ParticleSet::rejectMove(Index_t iat) -{ -#ifndef NDEBUG - if (iat != active_ptcl_) - throw std::runtime_error("Bug detected by rejectMove! Request electron is not active!"); -#endif - active_ptcl_ = -1; -} - -void ParticleSet::rejectMoveForwardMode(Index_t iat) -{ - assert(iat == active_ptcl_); - //Update distance-table - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->updatePartial(iat, false); - active_ptcl_ = -1; -} - -template -void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode) -{ - if constexpr (CT == CoordsType::POS_SPIN) - mw_accept_rejectSpinMove(p_list, iat, isAccepted); - mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode); -} - - -void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode) -{ - if (forward_mode) - { - ParticleSet& p_leader = p_list.getLeader(); - ScopedTimer update_scope(p_leader.myTimers[PS_accept]); - - const auto coords_list(extractCoordsRefList(p_list)); - std::vector new_positions; - new_positions.reserve(p_list.size()); - for (const ParticleSet& pset : p_list) - new_positions.push_back(pset.active_pos_); - p_leader.coordinates_->mw_acceptParticlePos(coords_list, iat, new_positions, isAccepted); - - auto& dts = p_leader.DistTables; - for (int i = 0; i < dts.size(); ++i) - { - const auto dt_list(extractDTRefList(p_list, i)); - dts[i]->mw_updatePartial(dt_list, iat, isAccepted); - } - - for (int iw = 0; iw < p_list.size(); iw++) - { - assert(iat == p_list[iw].active_ptcl_); - if (isAccepted[iw]) - p_list[iw].R[iat] = p_list[iw].active_pos_; - p_list[iw].active_ptcl_ = -1; - assert(p_list[iw].R[iat] == p_list[iw].coordinates_->getAllParticlePos()[iat]); - } - } - else - { - // loop over single walker acceptMove/rejectMove doesn't work safely. - // need to code carefully for both coordinate and distance table updates - // disable non-forward mode cases - if (!forward_mode) - throw std::runtime_error("BUG calling mw_accept_rejectMove in non-forward mode"); - } -} - -void ParticleSet::mw_accept_rejectSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted) -{ - for (int iw = 0; iw < p_list.size(); iw++) - { - assert(iat == p_list[iw].active_ptcl_); - if (isAccepted[iw]) - p_list[iw].spins[iat] = p_list[iw].active_spin_val_; - } -} - -void ParticleSet::donePbyP(bool skipSK) -{ - ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]); - coordinates_->donePbyP(); - if (!skipSK && structure_factor_) - structure_factor_->updateAllPart(*this); - for (size_t i = 0; i < DistTables.size(); ++i) - DistTables[i]->finalizePbyP(*this); - active_ptcl_ = -1; -} - -void ParticleSet::mw_donePbyP(const RefVectorWithLeader& p_list, bool skipSK) -{ - ParticleSet& p_leader = p_list.getLeader(); - ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]); - - for (ParticleSet& pset : p_list) - { - pset.coordinates_->donePbyP(); - pset.active_ptcl_ = -1; - } - - if (!skipSK && p_leader.structure_factor_) - { - auto sk_list = extractSKRefList(p_list); - StructFact::mw_updateAllPart(sk_list, p_list, p_leader.mw_structure_factor_data_handle_); - } - - auto& dts = p_leader.DistTables; - for (int i = 0; i < dts.size(); ++i) - { - const auto dt_list(extractDTRefList(p_list, i)); - dts[i]->mw_finalizePbyP(dt_list, p_list); - } -} - -void ParticleSet::makeVirtualMoves(const SingleParticlePos& newpos) -{ - active_ptcl_ = -1; - active_pos_ = newpos; - for (size_t i = 0; i < DistTables.size(); ++i) - DistTables[i]->move(*this, newpos, active_ptcl_, false); -} - -void ParticleSet::loadWalker(Walker_t& awalker, bool pbyp) -{ - ScopedTimer update_scope(myTimers[PS_loadWalker]); - R = awalker.R; - spins = awalker.spins; - coordinates_->setAllParticlePos(R); -#if !defined(SOA_MEMORY_OPTIMIZED) - G = awalker.G; - L = awalker.L; -#endif - if (pbyp) - { - // in certain cases, full tables must be ready - for (int i = 0; i < DistTables.size(); i++) - if (DistTables[i]->getModes() & DTModes::NEED_FULL_TABLE_ANYTIME) - DistTables[i]->evaluate(*this); - } - - active_ptcl_ = -1; -} - -void ParticleSet::mw_loadWalker(const RefVectorWithLeader& p_list, - const RefVector& walkers, - const std::vector& recompute, - bool pbyp) -{ - auto& p_leader = p_list.getLeader(); - ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]); - - auto loadWalkerConfig = [](ParticleSet& pset, Walker_t& awalker) { - pset.R = awalker.R; - pset.spins = awalker.spins; - pset.coordinates_->setAllParticlePos(pset.R); - }; - for (int iw = 0; iw < p_list.size(); ++iw) - if (recompute[iw]) - loadWalkerConfig(p_list[iw], walkers[iw]); - - if (pbyp) - { - auto& dts = p_leader.DistTables; - for (int i = 0; i < dts.size(); ++i) - { - const auto dt_list(extractDTRefList(p_list, i)); - dts[i]->mw_recompute(dt_list, p_list, recompute); - } - } -} - -void ParticleSet::saveWalker(Walker_t& awalker) -{ - awalker.R = R; - awalker.spins = spins; -#if !defined(SOA_MEMORY_OPTIMIZED) - awalker.G = G; - awalker.L = L; -#endif -} - -void ParticleSet::mw_saveWalker(const RefVectorWithLeader& psets, const RefVector& walkers) -{ - for (int iw = 0; iw < psets.size(); ++iw) - psets[iw].saveWalker(walkers[iw]); -} - - -void ParticleSet::initPropertyList() -{ - PropertyList.clear(); - //Need to add the default Properties according to the enumeration - PropertyList.add("LogPsi"); - PropertyList.add("SignPsi"); - PropertyList.add("UmbrellaWeight"); - PropertyList.add("R2Accepted"); - PropertyList.add("R2Proposed"); - PropertyList.add("DriftScale"); - PropertyList.add("AltEnergy"); - PropertyList.add("LocalEnergy"); - PropertyList.add("LocalPotential"); - - // There is no point in checking this, its quickly not consistent as other objects update property list. - // if (PropertyList.size() != WP::NUMPROPERTIES) - // { - // app_error() << "The number of default properties for walkers is not consistent." << std::endl; - // app_error() << "NUMPROPERTIES " << WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() << std::endl; - // throw std::runtime_error("ParticleSet::initPropertyList"); - // } -} - -int ParticleSet::addPropertyHistory(int leng) -{ - int newL = PropertyHistory.size(); - PropertyHistory.push_back(std::vector(leng, 0.0)); - PHindex.push_back(0); - return newL; -} - -// void ParticleSet::resetPropertyHistory( ) -// { -// for(int i=0;icreateResource(collection); - for (int i = 0; i < DistTables.size(); i++) - DistTables[i]->createResource(collection); - if (structure_factor_) - collection.addResource(std::make_unique()); -} - -void ParticleSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& p_list) -{ - auto& ps_leader = p_list.getLeader(); - ps_leader.coordinates_->acquireResource(collection, extractCoordsRefList(p_list)); - for (int i = 0; i < ps_leader.DistTables.size(); i++) - ps_leader.DistTables[i]->acquireResource(collection, extractDTRefList(p_list, i)); - - if (ps_leader.structure_factor_) - p_list.getLeader().mw_structure_factor_data_handle_ = collection.lendResource(); -} - -void ParticleSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& p_list) -{ - auto& ps_leader = p_list.getLeader(); - ps_leader.coordinates_->releaseResource(collection, extractCoordsRefList(p_list)); - for (int i = 0; i < ps_leader.DistTables.size(); i++) - ps_leader.DistTables[i]->releaseResource(collection, extractDTRefList(p_list, i)); - - if (ps_leader.structure_factor_) - collection.takebackResource(p_list.getLeader().mw_structure_factor_data_handle_); -} - -RefVectorWithLeader ParticleSet::extractDTRefList(const RefVectorWithLeader& p_list, int id) -{ - RefVectorWithLeader dt_list(*p_list.getLeader().DistTables[id]); - dt_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) - dt_list.push_back(*p.DistTables[id]); - return dt_list; -} - -RefVectorWithLeader ParticleSet::extractCoordsRefList( - const RefVectorWithLeader& p_list) -{ - RefVectorWithLeader coords_list(*p_list.getLeader().coordinates_); - coords_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) - coords_list.push_back(*p.coordinates_); - return coords_list; -} - -RefVectorWithLeader ParticleSet::extractSKRefList(const RefVectorWithLeader& p_list) -{ - RefVectorWithLeader sk_list(*p_list.getLeader().structure_factor_); - sk_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) - sk_list.push_back(*p.structure_factor_); - return sk_list; -} - -//explicit instantiations -template void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const MCCoords& displs); -template void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const MCCoords& displs); -template void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode); -template void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode); -} // namespace qmcplusplus diff --git a/src/Particle/ParticleSet.h b/src/Particle/ParticleSet.h index 2c23502190..de86f32a21 100644 --- a/src/Particle/ParticleSet.h +++ b/src/Particle/ParticleSet.h @@ -19,682 +19,12 @@ #ifndef QMCPLUSPLUS_PARTICLESET_H #define QMCPLUSPLUS_PARTICLESET_H -#include -#include -#include "ParticleTags.h" -#include "DynamicCoordinates.h" -#include "Walker.h" -#include "ResourceHandle.h" -#include "SpeciesSet.h" -#include "Pools/PooledData.h" -#include "OhmmsPETE/OhmmsArray.h" -#include "Utilities/TimerManager.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "type_traits/template_types.hpp" -#include "SimulationCell.h" -#include "MCCoords.hpp" -#include "DTModes.h" +#include "Particle/ParticleSetT.h" +#include "Particle/SimulationCell.h" namespace qmcplusplus { -///forward declaration of DistanceTable -class DistanceTable; -class DistanceTableAA; -class DistanceTableAB; -class ResourceCollection; -class StructFact; -struct SKMultiWalkerMem; - -/** Specialized paritlce class for atomistic simulations - * - * Derived from QMCTraits, ParticleBase and OhmmsElementBase. - * The ParticleLayout class represents a supercell with/without periodic boundary - * conditions. The ParticleLayout class also takes care of spatial decompositions - * for efficient evaluations for the interactions with a finite cutoff. - */ -class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatticeTraits -{ -public: - /// walker type - using Walker_t = Walker; - /// container type to store the property - using PropertyContainer_t = Walker_t::PropertyContainer_t; - /// buffer type for a serialized buffer - using Buffer_t = PooledData; - - enum quantum_domains - { - no_quantum_domain = 0, - classical, - quantum - }; - - ///quantum_domain of the particles, default = classical - quantum_domains quantum_domain; - - //@{ public data members - ///Species ID - ParticleIndex GroupID; - ///Position - ParticlePos R; - ///internal spin variables for dynamical spin calculations - ParticleScalar spins; - ///gradients of the particles - ParticleGradient G; - ///laplacians of the particles - ParticleLaplacian L; - ///mass of each particle - ParticleScalar Mass; - ///charge of each particle - ParticleScalar Z; - - ///the index of the active bead for particle-by-particle moves - Index_t activeBead; - ///the direction reptile traveling - Index_t direction; - - ///Particle density in G-space for MPC interaction - std::vector> DensityReducedGvecs; - std::vector Density_G; - Array Density_r; - - /// DFT potential - std::vector> VHXCReducedGvecs; - std::vector VHXC_G[2]; - Array VHXC_r[2]; - - /** name-value map of Walker Properties - * - * PropertyMap is used to keep the name-value mapping of - * Walker_t::Properties. PropertyList::Values are not - * necessarily updated during the simulations. - */ - PropertySetType PropertyList; - - /** properties of the current walker - * - * The internal order is identical to PropertyList, which holds - * the matching names. - */ - PropertyContainer_t Properties; - - /** observables in addition to those registered in Properties/PropertyList - * - * Such observables as density, gofr, sk are not stored per walker but - * collected during QMC iterations. - */ - Buffer_t Collectables; - - ///Property history vector - std::vector> PropertyHistory; - std::vector PHindex; - ///@} - - ///current MC step - int current_step; - - ///default constructor - ParticleSet(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); - - ///copy constructor - ParticleSet(const ParticleSet& p); - - ///default destructor - ~ParticleSet() override; - - /** create grouped particles - * @param agroup number of particles per group - */ - void create(const std::vector& agroup); - - /** print particle coordinates to a std::ostream - * @param os output stream - * @param maxParticlesToPrint maximal number of particles to print. Pass 0 to print all. - */ - void print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; - - ///dummy. For satisfying OhmmsElementBase. - bool get(std::ostream& os) const override; - ///dummy. For satisfying OhmmsElementBase. - bool put(std::istream&) override; - ///dummy. For satisfying OhmmsElementBase. - void reset() override; - - ///initialize ParticleSet from xmlNode - bool put(xmlNodePtr cur) override; - - ///specify quantum_domain of particles - void setQuantumDomain(quantum_domains qdomain); - - void set_quantum() { quantum_domain = quantum; } - - inline bool is_classical() const { return quantum_domain == classical; } - - inline bool is_quantum() const { return quantum_domain == quantum; } - - ///check whether quantum domain is valid for particles - inline bool quantumDomainValid(quantum_domains qdomain) const { return qdomain != no_quantum_domain; } - - ///check whether quantum domain is valid for particles - inline bool quantumDomainValid() const { return quantumDomainValid(quantum_domain); } - - /** add a distance table - * @param psrc source particle set - * @param modes bitmask DistanceTable::DTModes - * - * if this->myName == psrc.getName(), AA type. Otherwise, AB type. - */ - int addTable(const ParticleSet& psrc, DTModes modes = DTModes::ALL_OFF); - - ///get a distance table by table_ID - inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; } - ///get a distance table by table_ID and dyanmic_cast to DistanceTableAA - const DistanceTableAA& getDistTableAA(int table_ID) const; - ///get a distance table by table_ID and dyanmic_cast to DistanceTableAB - const DistanceTableAB& getDistTableAB(int table_ID) const; - - /** reset all the collectable quantities during a MC iteration - */ - inline void resetCollectables() { std::fill(Collectables.begin(), Collectables.end(), 0.0); } - - /** update the internal data - *@param skip SK update if skipSK is true - */ - void update(bool skipSK = false); - - /// batched version of update - static void mw_update(const RefVectorWithLeader& p_list, bool skipSK = false); - - /** create Structure Factor with PBCs - */ - void createSK(); - - bool hasSK() const { return bool(structure_factor_); } - - /** return Structure Factor - */ - const StructFact& getSK() const - { - assert(structure_factor_); - return *structure_factor_; - }; - - /** Turn on per particle storage in Structure Factor - */ - void turnOnPerParticleSK(); - - /** Get state (on/off) of per particle storage in Structure Factor - */ - bool getPerParticleSKState() const; - - ///retrun the SpeciesSet of this particle set - inline SpeciesSet& getSpeciesSet() { return my_species_; } - ///retrun the const SpeciesSet of this particle set - inline const SpeciesSet& getSpeciesSet() const { return my_species_; } - - ///return parent's name - inline const std::string& parentName() const { return ParentName; } - inline void setName(const std::string& aname) - { - myName = aname; - if (ParentName == "0") - { - ParentName = aname; - } - } - - inline const DynamicCoordinates& getCoordinates() const { return *coordinates_; } - - void resetGroups(); - - const auto& getSimulationCell() const { return simulation_cell_; } - const auto& getLattice() const { return simulation_cell_.getLattice(); } - auto& getPrimitiveLattice() const { return const_cast(simulation_cell_.getPrimLattice()); } - const auto& getLRBox() const { return simulation_cell_.getLRBox(); } - - inline bool isSameMass() const { return same_mass_; } - inline bool isSpinor() const { return is_spinor_; } - inline void setSpinor(bool is_spinor) { is_spinor_ = is_spinor; } - - /// return active particle id - inline Index_t getActivePtcl() const { return active_ptcl_; } - inline const PosType& getActivePos() const { return active_pos_; } - inline Scalar_t getActiveSpinVal() const { return active_spin_val_; } - - /// return the active position if the particle is active or the return current position if not - inline const PosType& activeR(int iat) const - { - // When active_ptcl_ == iat, a move has been proposed. - return (active_ptcl_ == iat) ? active_pos_ : R[iat]; - } - - /// return the active spin value if the particle is active or return the current spin value if not - inline const Scalar_t& activeSpin(int iat) const - { - // When active_ptcl_ == iat, a move has been proposed. - return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat]; - } - - /** move the iat-th particle to active_pos_ - * @param iat the index of the particle to be moved - * @param displ the displacement of the iat-th particle position - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - * - * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTable::Temp. - * If maybe_accept = false, certain operations for accepting moves will be skipped for optimal performance. - */ - void makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true); - /// makeMove, but now includes an update to the spin variable - void makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); - - /// batched version of makeMove - template - static void mw_makeMove(const RefVectorWithLeader& p_list, int iat, const MCCoords& displs); - - static void mw_makeMove(const RefVectorWithLeader& p_list, - int iat, - const std::vector& displs); - - /// batched version makeMove for spin variable only - static void mw_makeSpinMove(const RefVectorWithLeader& p_list, - int iat, - const std::vector& sdispls); - - /** move the iat-th particle to active_pos_ - * @param iat the index of the particle to be moved - * @param displ random displacement of the iat-th particle - * @return true, if the move is valid - * - * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTable::Temp. - * - * When a Lattice is defined, passing two checks makes a move valid. - * outOfBound(displ): invalid move, if displ is larger than half, currently, of the box in any direction - * isValid(Lattice.toUnit(active_pos_)): invalid move, if active_pos_ goes out of the Lattice in any direction marked with open BC. - * Note: active_pos_ and distances tables are always evaluated no matter the move is valid or not. - */ - bool makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ); - /// makeMoveAndCheck, but now includes an update to the spin variable - bool makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); - - /** Handles virtual moves for all the particles to a single newpos. - * - * The state active_ptcl_ remains -1 and rejectMove is not needed. - * acceptMove can not be used. - * See QMCHamiltonians::MomentumEstimator as an example - */ - void makeVirtualMoves(const SingleParticlePos& newpos); - - /** move all the particles of a walker - * @param awalker the walker to operate - * @param deltaR proposed displacement - * @param dt factor of deltaR - * @return true if all the moves are legal. - * - * If big displacements or illegal positions are detected, return false. - * If all good, R = awalker.R + dt* deltaR - */ - bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt); - - bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, const std::vector& dt); - - /** move all the particles including the drift - * - * Otherwise, everything is the same as makeMove for a walker - */ - bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - RealType dt); - - bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - const std::vector& dt); - - /** accept or reject a proposed move - * Two operation modes: - * The using and updating distance tables via `ParticleSet` operate in two modes, regular and forward modes. - * - * Regular mode - * The regular mode can only be used when the distance tables for particle pairs are fully up-to-date. - * This is the case after calling `ParticleSet::update()` in a unit test or after p-by-p moves in a QMC driver. - * In this mode, the distance tables remain up-to-date after calling `ParticleSet::acceptMove` - * and calling `ParticleSet::rejectMove` is not mandatory. - * - * Forward mode - * The forward mode assumes that distance table is not fully up-to-date until every particle is accepted - * or rejected to move once in order. This is the mode used in the p-by-p part of drivers. - * In this mode, calling `ParticleSet::accept_rejectMove` is required to handle accept/reject rather than - * calling individual `ParticleSet::acceptMove` and `ParticleSet::reject`. - * `ParticleSet::accept_rejectMove(iel)` ensures the distance tables (jel < iel) part is fully up-to-date - * regardless a move is accepted or rejected. For this reason, the rejecting operation inside - * `ParticleSet::accept_rejectMove` involves writing the distances with respect to the old particle position. - */ - void accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true); - - /** accept the move and update the particle attribute by the proposed move in regular mode - *@param iat the index of the particle whose position and other attributes to be updated - */ - void acceptMove(Index_t iat); - - /** reject a proposed move in regular mode - * @param iat the electron whose proposed move gets rejected. - */ - void rejectMove(Index_t iat); - - /// batched version of acceptMove and rejectMove fused, templated on CoordsType - template - static void mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode = true); - - /// batched version of acceptMove and rejectMove fused - static void mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode = true); - - /** batched version of acceptMove and reject Move fused, but only for spins - * - * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_ gets reset to -1 - * This would cause the assertion that we have the right particle index to fail if done in the - * wrong order - */ - static void mw_accept_rejectSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted); - - void initPropertyList(); - inline int addProperty(const std::string& pname) { return PropertyList.add(pname.c_str()); } - - int addPropertyHistory(int leng); - // void rejectedMove(); - // void resetPropertyHistory( ); - // void addPropertyHistoryPoint(int index, RealType data); - - void convert(const ParticlePos& pin, ParticlePos& pout); - void convert2Unit(const ParticlePos& pin, ParticlePos& pout); - void convert2Cart(const ParticlePos& pin, ParticlePos& pout); - void convert2Unit(ParticlePos& pout); - void convert2Cart(ParticlePos& pout); - void convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout); - void convert2CartInBox(const ParticlePos& pint, ParticlePos& pout); - - void applyBC(const ParticlePos& pin, ParticlePos& pout); - void applyBC(ParticlePos& pos); - void applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last); - void applyMinimumImage(ParticlePos& pinout); - - /** load a Walker_t to the current ParticleSet - * @param awalker the reference to the walker to be loaded - * @param pbyp true if it is used by PbyP update - * - * PbyP requires the distance tables and Sk with awalker.R - */ - void loadWalker(Walker_t& awalker, bool pbyp); - /** batched version of loadWalker */ - static void mw_loadWalker(const RefVectorWithLeader& p_list, - const RefVector& walkers, - const std::vector& recompute, - bool pbyp); - - /** save this to awalker - * - * just the R, G, and L - * More duplicate data that makes code difficult to reason about should be removed. - */ - void saveWalker(Walker_t& awalker); - - /** batched version of saveWalker - * - * just the R, G, and L - */ - static void mw_saveWalker(const RefVectorWithLeader& psets, const RefVector& walkers); - - /** update structure factor and unmark active_ptcl_ - *@param skip SK update if skipSK is true - * - * The Coulomb interaction evaluation needs the structure factor. - * For these reason, call donePbyP after the loop of single - * electron moves before evaluating the Hamiltonian. Unmark - * active_ptcl_ is more of a safety measure probably not needed. - */ - void donePbyP(bool skipSK = false); - /// batched version of donePbyP - static void mw_donePbyP(const RefVectorWithLeader& p_list, bool skipSK = false); - - ///return the address of the values of Hamiltonian terms - inline FullPrecRealType* restrict getPropertyBase() { return Properties.data(); } - - ///return the address of the values of Hamiltonian terms - inline const FullPrecRealType* restrict getPropertyBase() const { return Properties.data(); } - - ///return the address of the i-th properties - inline FullPrecRealType* restrict getPropertyBase(int i) { return Properties[i]; } - - ///return the address of the i-th properties - inline const FullPrecRealType* restrict getPropertyBase(int i) const { return Properties[i]; } - - inline void setTwist(const SingleParticlePos& t) { myTwist = t; } - inline const SingleParticlePos& getTwist() const { return myTwist; } - - /** Initialize particles around another ParticleSet - * Used to initialize an electron ParticleSet by an ion ParticleSet - */ - void randomizeFromSource(ParticleSet& src); - - /** get species name of particle i - */ - inline const std::string& species_from_index(int i) { return my_species_.speciesName[GroupID[i]]; } - - inline size_t getTotalNum() const { return TotalNum; } - - inline void clear() - { - TotalNum = 0; - - R.clear(); - spins.clear(); - GroupID.clear(); - G.clear(); - L.clear(); - Mass.clear(); - Z.clear(); - - coordinates_->resize(0); - } - - ///return the number of groups - inline int groups() const { return group_offsets_->size() - 1; } - - ///return the first index of a group i - inline int first(int igroup) const { return (*group_offsets_)[igroup]; } - - ///return the last index of a group i - inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } - - ///return the group id of a given particle in the particle set. - inline int getGroupID(int iat) const - { - assert(iat >= 0 && iat < TotalNum); - return GroupID[iat]; - } - - ///return the size of a group - inline int groupsize(int igroup) const { return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup]; } - - ///add attributes to list for IO - template - inline void createAttributeList(ATList& AttribList) - { - R.setTypeName(ParticleTags::postype_tag); - R.setObjName(ParticleTags::position_tag); - spins.setTypeName(ParticleTags::scalartype_tag); - spins.setObjName(ParticleTags::spins_tag); - GroupID.setTypeName(ParticleTags::indextype_tag); - GroupID.setObjName(ParticleTags::ionid_tag); - //add basic attributes - AttribList.add(R); - AttribList.add(spins); - AttribList.add(GroupID); - - G.setTypeName(ParticleTags::gradtype_tag); - L.setTypeName(ParticleTags::laptype_tag); - - G.setObjName("grad"); - L.setObjName("lap"); - - AttribList.add(G); - AttribList.add(L); - - //more particle attributes - Mass.setTypeName(ParticleTags::scalartype_tag); - Mass.setObjName("mass"); - AttribList.add(Mass); - - Z.setTypeName(ParticleTags::scalartype_tag); - Z.setObjName("charge"); - AttribList.add(Z); - } - - inline void setMapStorageToInput(const std::vector& mapping) { map_storage_to_input_ = mapping; } - inline const std::vector& get_map_storage_to_input() const { return map_storage_to_input_; } - - inline int getNumDistTables() const { return DistTables.size(); } - - inline auto& get_group_offsets() const { return *group_offsets_; } - - /// initialize a shared resource and hand it to a collection - void createResource(ResourceCollection& collection) const; - /** acquire external resource and assocaite it with the list of ParticleSet - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& p_list); - /** release external resource - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& p_list); - - static RefVectorWithLeader extractDTRefList(const RefVectorWithLeader& p_list, int id); - static RefVectorWithLeader extractCoordsRefList(const RefVectorWithLeader& p_list); - static RefVectorWithLeader extractSKRefList(const RefVectorWithLeader& p_list); - -protected: - /// reference to global simulation cell - const SimulationCell& simulation_cell_; - - ///true if the particles have the same mass - bool same_mass_; - ///true is a dynamic spin calculation - bool is_spinor_; - /** the index of the active particle during particle-by-particle moves - * - * when a single particle move is proposed, the particle id is assigned to active_ptcl_ - * No matter the move is accepted or rejected, active_ptcl_ is marked back to -1. - * This state flag is used for picking coordinates and distances for SPO evaluation. - */ - Index_t active_ptcl_; - ///the proposed position of active_ptcl_ during particle-by-particle moves - SingleParticlePos active_pos_; - ///the proposed spin of active_ptcl_ during particle-by-particle moves - Scalar_t active_spin_val_; - - /** Map storage index to the input index. - * If not empty, particles were reordered by groups when being loaded from XML input. - * When other input data are affected by reordering, its builder should query this mapping. - * map_storage_to_input_[5] = 2 means the index 5(6th) particle in this ParticleSet was read from - * the index 2(3th) particle in the XML input - */ - std::vector map_storage_to_input_; - - ///SpeciesSet of particles - SpeciesSet my_species_; - - ///Structure factor - std::unique_ptr structure_factor_; - - ///multi walker structure factor data - ResourceHandle mw_structure_factor_data_handle_; - - /** map to handle distance tables - * - * myDistTableMap[source-particle-tag]= locator in the distance table - * myDistTableMap[ObjectTag] === 0 - */ - std::map myDistTableMap; - - /// distance tables that need to be updated by moving this ParticleSet - std::vector> DistTables; - - /// Descriptions from distance table creation. Same order as DistTables. - std::vector distTableDescriptions; - - TimerList_t myTimers; - - SingleParticlePos myTwist; - - std::string ParentName; - - ///total number of particles - size_t TotalNum; - - ///array to handle a group of distinct particles per species - std::shared_ptr>> group_offsets_; - - ///internal representation of R. It can be an SoA copy of R - std::unique_ptr coordinates_; - - /** compute temporal DistTables and SK for a new particle position - * - * @param iat the particle that is moved on a sphere - * @param newpos a new particle position - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - */ - void computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true); - - - /** compute temporal DistTables and SK for a new particle position for each walker in a batch - * - * @param p_list the list of wrapped ParticleSet references in a walker batch - * @param iat the particle that is moved on a sphere - * @param new_positions new particle positions - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - */ - static void mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& new_positions, - bool maybe_accept = true); - - /** actual implemenation for accepting a proposed move in forward mode - * - * @param iat the index of the particle whose position and other attributes to be updated - */ - void acceptMoveForwardMode(Index_t iat); - - /** reject a proposed move in forward mode - * @param iat the electron whose proposed move gets rejected. - */ - void rejectMoveForwardMode(Index_t iat); - - /// resize internal storage - inline void resize(size_t numPtcl) - { - TotalNum = numPtcl; - - R.resize(numPtcl); - spins.resize(numPtcl); - GroupID.resize(numPtcl); - G.resize(numPtcl); - L.resize(numPtcl); - Mass.resize(numPtcl); - Z.resize(numPtcl); - - coordinates_->resize(numPtcl); - } -}; +using ParticleSet = ParticleSetT; } // namespace qmcplusplus #endif diff --git a/src/Particle/ParticleSetPool.cpp b/src/Particle/ParticleSetPool.cpp deleted file mode 100644 index 7f4cb7f3a5..0000000000 --- a/src/Particle/ParticleSetPool.cpp +++ /dev/null @@ -1,239 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/**@file ParticleSetPool.cpp - * @brief Implements ParticleSetPool operators. - */ -#include "ParticleSetPool.h" -#include "ParticleBase/RandomSeqGenerator.h" -#include "ParticleIO/XMLParticleIO.h" -#include "ParticleIO/LatticeIO.h" -#include "Utilities/ProgressReportEngine.h" -#include "OhmmsData/AttributeSet.h" -#include "OhmmsData/Libxml2Doc.h" -#include "Particle/InitMolecularSystem.h" -#include "LongRange/LRCoulombSingleton.h" -#include -#include - -namespace qmcplusplus -{ -ParticleSetPool::ParticleSetPool(Communicate* c, const char* aname) - : MPIObjectBase(c), simulation_cell_(std::make_unique()) -{ - ClassName = "ParticleSetPool"; - myName = aname; -} - -ParticleSetPool::ParticleSetPool(ParticleSetPool&& other) noexcept - : MPIObjectBase(other.myComm), simulation_cell_(std::move(other.simulation_cell_)), myPool(std::move(other.myPool)) -{ - ClassName = other.ClassName; - myName = other.myName; -} - -ParticleSetPool::~ParticleSetPool() = default; - -ParticleSet* ParticleSetPool::getParticleSet(const std::string& pname) -{ - if (auto pit = myPool.find(pname); pit == myPool.end()) - return nullptr; - else - return pit->second.get(); -} - -MCWalkerConfiguration* ParticleSetPool::getWalkerSet(const std::string& pname) -{ - auto mc = dynamic_cast(getParticleSet(pname)); - if (mc == nullptr) - { - throw std::runtime_error("ParticleSePool::getWalkerSet missing " + pname); - } - return mc; -} - -void ParticleSetPool::addParticleSet(std::unique_ptr&& p) -{ - const auto pit(myPool.find(p->getName())); - if (pit == myPool.end()) - { - auto& pname = p->getName(); - LOGMSG(" Adding " << pname << " ParticleSet to the pool") - if (&p->getSimulationCell() != simulation_cell_.get()) - throw std::runtime_error("Bug detected! ParticleSetPool::addParticleSet requires p created with the simulation " - "cell from ParticleSetPool."); - myPool.emplace(pname, std::move(p)); - } - else - throw std::runtime_error(p->getName() + " exists. Cannot be added again."); -} - -bool ParticleSetPool::readSimulationCellXML(xmlNodePtr cur) -{ - ReportEngine PRE("ParticleSetPool", "putLattice"); - - bool lattice_defined = false; - try - { - LatticeParser a(simulation_cell_->lattice_); - lattice_defined = a.put(cur); - } - catch (const UniformCommunicateError& ue) - { - myComm->barrier_and_abort(ue.what()); - } - - if (lattice_defined) - { - app_log() << " Overwriting global supercell " << std::endl; - simulation_cell_->resetLRBox(); - if (outputManager.isHighActive()) - simulation_cell_->lattice_.print(app_log(), 2); - else - simulation_cell_->lattice_.print(app_summary(), 1); - } - return lattice_defined; -} - -/** process an xml element - * @param cur current xmlNodePtr - * @return true, if successful. - * - * Creating MCWalkerConfiguration for all the ParticleSet - * objects. - */ -bool ParticleSetPool::put(xmlNodePtr cur) -{ - ReportEngine PRE("ParticleSetPool", "put"); - std::string id("e"); - std::string role("none"); - std::string randomR("no"); - std::string randomsrc; - std::string useGPU; - std::string spinor; - OhmmsAttributeSet pAttrib; - pAttrib.add(id, "id"); - pAttrib.add(id, "name"); - pAttrib.add(role, "role"); - pAttrib.add(randomR, "random"); - pAttrib.add(randomsrc, "randomsrc"); - pAttrib.add(randomsrc, "random_source"); - pAttrib.add(spinor, "spinor", {"no", "yes"}); - pAttrib.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values); - pAttrib.put(cur); - //backward compatibility - if (id == "e" && role == "none") - role = "MC"; - ParticleSet* pTemp = getParticleSet(id); - if (pTemp == 0) - { - const bool use_offload = CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET; - app_summary() << std::endl; - app_summary() << " Particle Set" << std::endl; - app_summary() << " ------------" << std::endl; - app_summary() << " Name: " << id << " Offload : " << (use_offload ? "yes" : "no") << std::endl; - app_summary() << std::endl; - - // select OpenMP offload implementation in ParticleSet. - if (use_offload) - pTemp = new MCWalkerConfiguration(*simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD); - else - pTemp = new MCWalkerConfiguration(*simulation_cell_, DynamicCoordinateKind::DC_POS); - - myPool.emplace(id, pTemp); - - try - { - XMLParticleParser pread(*pTemp); - pread.readXML(cur); - } - catch (const UniformCommunicateError& ue) - { - myComm->barrier_and_abort(ue.what()); - } - - //if random_source is given, create a node - if (randomR == "yes" && !randomsrc.empty()) - { - xmlNodePtr anode = xmlNewNode(NULL, (const xmlChar*)"init"); - xmlNewProp(anode, (const xmlChar*)"source", (const xmlChar*)randomsrc.c_str()); - xmlNewProp(anode, (const xmlChar*)"target", (const xmlChar*)id.c_str()); - randomize_nodes.push_back(anode); - } - pTemp->setName(id); - pTemp->setSpinor(spinor == "yes"); - app_summary() << " Particle set size: " << pTemp->getTotalNum() << " Groups : " << pTemp->groups() << std::endl; - app_summary() << std::endl; - return true; - } - else - { - app_warning() << "Particle set " << id << " is already created. Ignoring this section." << std::endl; - } - app_summary() << std::endl; - return true; -} - -void ParticleSetPool::randomize() -{ - app_log() << "ParticleSetPool::randomize " << randomize_nodes.size() << " ParticleSet" - << (randomize_nodes.size() == 1 ? "" : "s") << "." << std::endl; - bool success = true; - for (int i = 0; i < randomize_nodes.size(); ++i) - { - InitMolecularSystem moinit(*this); - success &= moinit.put(randomize_nodes[i]); - xmlFreeNode(randomize_nodes[i]); - } - randomize_nodes.clear(); - if (!success) - throw std::runtime_error("ParticleSePool::randomize failed to randomize some Particlesets!"); -} - -bool ParticleSetPool::get(std::ostream& os) const -{ - os << "ParticleSetPool has: " << std::endl << std::endl; - os.setf(std::ios::scientific, std::ios::floatfield); - os.precision(14); - for (const auto& [name, pset] : myPool) - if (outputManager.isDebugActive()) - pset->print(os, 0); - else - pset->print(os, 10 /* maxParticlesToPrint */); - return true; -} - -void ParticleSetPool::output_particleset_info(Libxml2Document& doc, xmlNodePtr root) -{ - xmlNodePtr particles_info = doc.addChild(root, "particles"); - PoolType::const_iterator it(myPool.begin()), it_end(myPool.end()); - while (it != it_end) - { - xmlNodePtr particle = doc.addChild(particles_info, "particle"); - doc.addChild(particle, "name", (*it).second->getName()); - doc.addChild(particle, "size", (*it).second->getTotalNum()); - ++it; - } -} - -/** reset is used to initialize and evaluate the distance tables - */ -void ParticleSetPool::reset() -{ - for (const auto& [key, pset] : myPool) - pset->update(); -} - -} // namespace qmcplusplus diff --git a/src/Particle/ParticleSetPool.h b/src/Particle/ParticleSetPool.h index 63f252a997..08cc089678 100644 --- a/src/Particle/ParticleSetPool.h +++ b/src/Particle/ParticleSetPool.h @@ -18,107 +18,13 @@ #ifndef QMCPLUSPLUS_PARTICLESETPOOL_H #define QMCPLUSPLUS_PARTICLESETPOOL_H -#include "OhmmsData/OhmmsElementBase.h" -#include "Particle/MCWalkerConfiguration.h" -#include "Message/MPIObjectBase.h" -#include "SimulationCell.h" +#include "Configuration.h" +#include "Particle/ParticleSetPoolT.h" namespace qmcplusplus { -/** @ingroup qmcapp - * @brief Manage a collection of ParticleSet objects - * - * This object handles \ elements and - * functions as a builder class for ParticleSet objects. - */ -class ParticleSetPool : public MPIObjectBase -{ -public: - using PoolType = std::map>; - - /** constructor - * @param aname xml tag - */ - ParticleSetPool(Communicate* c, const char* aname = "particleset"); - ~ParticleSetPool(); - - ParticleSetPool(const ParticleSetPool&) = delete; - ParticleSetPool& operator=(const ParticleSetPool&) = delete; - ParticleSetPool(ParticleSetPool&& pset) noexcept; - ParticleSetPool& operator=(ParticleSetPool&&) = default; - - bool put(xmlNodePtr cur); - bool get(std::ostream& os) const; - void reset(); - - void output_particleset_info(Libxml2Document& doc, xmlNodePtr root); - - /** initialize the supercell shared by all the particle sets - * - * return value is never checked anywhere - * side effect simulation_cell_ UPtr is set - * to particle layout created on heap. - * This is later directly assigned to pset member variable Lattice. - */ - bool readSimulationCellXML(xmlNodePtr cur); - - ///return true, if the pool is empty - inline bool empty() const { return myPool.empty(); } - - /** add a ParticleSet* to the pool with its ownership transferred - * ParticleSet built outside the ParticleSetPool must be constructed with - * the simulation cell from this->simulation_cell_. - */ - void addParticleSet(std::unique_ptr&& p); - - /** get a named ParticleSet - * @param pname name of the ParticleSet - * @return a MCWalkerConfiguration object with pname - * - * When the named ParticleSet is not in this object, return 0. - */ - ParticleSet* getParticleSet(const std::string& pname); - - /** get a named MCWalkerConfiguration - * @param pname name of the MCWalkerConfiguration - * @return a MCWalkerConfiguration object with pname - * - * When the named MCWalkerConfiguration is not in this object, return 0. - */ - MCWalkerConfiguration* getWalkerSet(const std::string& pname); - - /** get the Pool object - */ - inline const PoolType& getPool() const { return myPool; } - - /// get simulation cell - const auto& getSimulationCell() const { return *simulation_cell_; } - - /// set simulation cell - void setSimulationCell(const SimulationCell& simulation_cell) { *simulation_cell_ = simulation_cell; } - /** randomize a particleset particleset/@random='yes' && particleset@random_source exists - */ - void randomize(); +using ParticleSetPool = ParticleSetPoolT; -private: - /** global simulation cell - * - * updated by - * - readSimulationCellXML() parsing element - * - setSimulationCell() - */ - std::unique_ptr simulation_cell_; - /** List of ParticleSet owned - * - * Each ParticleSet has to have a unique name which is used as a key for the map. - */ - PoolType myPool; - /** xml node for random initialization. - * - * randomize() process initializations just before starting qmc sections - */ - std::vector randomize_nodes; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/ParticleSetPoolT.cpp b/src/Particle/ParticleSetPoolT.cpp index 7100822214..010bd5de26 100644 --- a/src/Particle/ParticleSetPoolT.cpp +++ b/src/Particle/ParticleSetPoolT.cpp @@ -270,9 +270,17 @@ ParticleSetPoolT::reset() } // explicit instantiations +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION template class ParticleSetPoolT; +#else template class ParticleSetPoolT; +#endif +#else +#ifndef MIXED_PRECISION template class ParticleSetPoolT>; +#else template class ParticleSetPoolT>; - +#endif +#endif } // namespace qmcplusplus diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp index bc5f7518ab..2247ee14d2 100644 --- a/src/Particle/ParticleSetT.cpp +++ b/src/Particle/ParticleSetT.cpp @@ -33,6 +33,8 @@ #include "ResourceCollection.h" #include "Utilities/IteratorUtility.h" #include "Utilities/RandomGenerator.h" +#include "Particle/FastParticleOperators.h" +#include "Concurrency/OpenMP.h" #include #include @@ -1337,10 +1339,13 @@ ParticleSetT::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout) } // explicit instantiations +//#ifndef QMC_COMPLEX template class ParticleSetT; template class ParticleSetT; +#ifdef QMC_COMPLEX template class ParticleSetT>; template class ParticleSetT>; +#endif template void ParticleSetT::mw_makeMove( diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h index 10b627696a..c5b984cbbe 100644 --- a/src/Particle/ParticleSetT.h +++ b/src/Particle/ParticleSetT.h @@ -66,9 +66,12 @@ class ParticleSetT : public OhmmsElementBase { public: using RealType = typename ParticleSetTraits::RealType; + using ValueType = typename ParticleSetTraits::ValueType; + using GradType = typename ParticleSetTraits::GradType; using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; using ComplexType = typename ParticleSetTraits::ComplexType; using PosType = typename ParticleSetTraits::PosType; + using TensorType = typename ParticleSetTraits::TensorType; using PropertySetType = typename ParticleSetTraits::PropertySetType; @@ -94,6 +97,8 @@ class ParticleSetT : public OhmmsElementBase /// buffer type for a serialized buffer using Buffer_t = PooledData; + using SingleParticleValue = typename LatticeParticleTraits::SingleParticleValue; + enum quantum_domains { no_quantum_domain = 0, @@ -143,163 +148,116 @@ class ParticleSetT : public OhmmsElementBase * Walker_t::Properties. PropertyList::Values are not * necessarily updated during the simulations. */ - PropertySetType PropertyList; + PropertySetType PropertyList; - /** properties of the current walker + /** properties of the current walker * * The internal order is identical to PropertyList, which holds * the matching names. */ - PropertyContainer_t Properties; + PropertyContainer_t Properties; - /** observables in addition to those registered in Properties/PropertyList + /** observables in addition to those registered in Properties/PropertyList * * Such observables as density, gofr, sk are not stored per walker but * collected during QMC iterations. */ - Buffer_t Collectables; + Buffer_t Collectables; - /// Property history vector - std::vector> PropertyHistory; - std::vector PHindex; - ///@} + /// Property history vector + std::vector> PropertyHistory; + std::vector PHindex; + ///@} - /// current MC step - int current_step; + /// current MC step + int current_step; - /// default constructor - ParticleSetT(const SimulationCellT& simulation_cell, - const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); + /// default constructor + ParticleSetT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); - /// copy constructor - ParticleSetT(const ParticleSetT& p); + /// copy constructor + ParticleSetT(const ParticleSetT& p); - /// default destructor - ~ParticleSetT() override; + /// default destructor + ~ParticleSetT() override; - /** create grouped particles + /** create grouped particles * @param agroup number of particles per group */ - void - create(const std::vector& agroup); + void create(const std::vector& agroup); - /** print particle coordinates to a std::ostream + /** print particle coordinates to a std::ostream * @param os output stream * @param maxParticlesToPrint maximal number of particles to print. Pass 0 * to print all. */ - void - print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; + void print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; - /// dummy. For satisfying OhmmsElementBase. - bool - get(std::ostream& os) const override; - /// dummy. For satisfying OhmmsElementBase. - bool - put(std::istream&) override; - /// dummy. For satisfying OhmmsElementBase. - void - reset() override; + /// dummy. For satisfying OhmmsElementBase. + bool get(std::ostream& os) const override; + /// dummy. For satisfying OhmmsElementBase. + bool put(std::istream&) override; + /// dummy. For satisfying OhmmsElementBase. + void reset() override; - /// initialize ParticleSet from xmlNode - bool - put(xmlNodePtr cur) override; + /// initialize ParticleSet from xmlNode + bool put(xmlNodePtr cur) override; - /// specify quantum_domain of particles - void - setQuantumDomain(quantum_domains qdomain); + /// specify quantum_domain of particles + void setQuantumDomain(quantum_domains qdomain); - void - set_quantum() - { - quantum_domain = quantum; - } + void set_quantum() { quantum_domain = quantum; } - inline bool - is_classical() const - { - return quantum_domain == classical; - } + inline bool is_classical() const { return quantum_domain == classical; } - inline bool - is_quantum() const - { - return quantum_domain == quantum; - } + inline bool is_quantum() const { return quantum_domain == quantum; } - /// check whether quantum domain is valid for particles - inline bool - quantumDomainValid(quantum_domains qdomain) const - { - return qdomain != no_quantum_domain; - } + /// check whether quantum domain is valid for particles + inline bool quantumDomainValid(quantum_domains qdomain) const { return qdomain != no_quantum_domain; } - /// check whether quantum domain is valid for particles - inline bool - quantumDomainValid() const - { - return quantumDomainValid(quantum_domain); - } + /// check whether quantum domain is valid for particles + inline bool quantumDomainValid() const { return quantumDomainValid(quantum_domain); } - /** add a distance table + /** add a distance table * @param psrc source particle set * @param modes bitmask DistanceTable::DTModes * * if this->myName == psrc.getName(), AA type. Otherwise, AB type. */ - int - addTable(const ParticleSetT& psrc, DTModes modes = DTModes::ALL_OFF); + int addTable(const ParticleSetT& psrc, DTModes modes = DTModes::ALL_OFF); - /// get a distance table by table_ID - inline auto& - getDistTable(int table_ID) const - { - return *DistTables[table_ID]; - } - /// get a distance table by table_ID and dyanmic_cast to DistanceTableAA - const DistanceTableAAT& - getDistTableAA(int table_ID) const; - /// get a distance table by table_ID and dyanmic_cast to DistanceTableAB - const DistanceTableABT& - getDistTableAB(int table_ID) const; + /// get a distance table by table_ID + inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; } + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAA + const DistanceTableAAT& getDistTableAA(int table_ID) const; + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAB + const DistanceTableABT& getDistTableAB(int table_ID) const; - /** reset all the collectable quantities during a MC iteration + /** reset all the collectable quantities during a MC iteration */ - inline void - resetCollectables() - { - std::fill(Collectables.begin(), Collectables.end(), 0.0); - } + inline void resetCollectables() { std::fill(Collectables.begin(), Collectables.end(), 0.0); } - /** update the internal data + /** update the internal data *@param skip SK update if skipSK is true */ - void - update(bool skipSK = false); + void update(bool skipSK = false); - /// batched version of update - static void - mw_update( - const RefVectorWithLeader& p_list, bool skipSK = false); + /// batched version of update + static void mw_update(const RefVectorWithLeader& p_list, bool skipSK = false); - /** create Structure Factor with PBCs + /** create Structure Factor with PBCs */ - void - createSK(); + void createSK(); - bool - hasSK() const - { - return bool(structure_factor_); - } + bool hasSK() const { return bool(structure_factor_); } - /** return Structure Factor + /** return Structure Factor */ - const StructFactT& - getSK() const - { - assert(structure_factor_); - return *structure_factor_; + const StructFactT& getSK() const + { + assert(structure_factor_); + return *structure_factor_; }; /** Turn on per particle storage in Structure Factor diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h index 299687aeec..7a54a08f1b 100644 --- a/src/Particle/ParticleSetTraits.h +++ b/src/Particle/ParticleSetTraits.h @@ -26,7 +26,7 @@ struct ParticleSetTraits using PosType = TinyVector; using GradType = TinyVector; // using HessType = Tensor; - // using TensorType = Tensor; + using TensorType = Tensor; // using GradHessType = TinyVector, DIM>; // using IndexVector = Vector; // using ValueVector = Vector; @@ -71,7 +71,7 @@ struct LatticeParticleTraits using Index_t = int; using Scalar_t = FullPrecRealType; using Complex_t = FullPrecComplexType; - using Tensor_t = ParticleTensorType; + using Tensor_t = Tensor; using ParticleIndex = ParticleAttrib; using ParticleScalar = ParticleAttrib; diff --git a/src/Particle/RealSpacePositions.h b/src/Particle/RealSpacePositions.h index b7fe5e44f5..e6ff6beaa7 100644 --- a/src/Particle/RealSpacePositions.h +++ b/src/Particle/RealSpacePositions.h @@ -15,51 +15,12 @@ #ifndef QMCPLUSPLUS_REALSPACE_POSITIONS_H #define QMCPLUSPLUS_REALSPACE_POSITIONS_H -#include "Particle/DynamicCoordinates.h" -#include "OhmmsSoA/VectorSoaContainer.h" +#include "Configuration.h" +#include "Particle/RealSpacePositionsT.h" namespace qmcplusplus { -/** Introduced to handle virtual moves and ratio computations, e.g. for non-local PP evaluations. - */ -class RealSpacePositions : public DynamicCoordinates -{ -public: - using ParticlePos = PtclOnLatticeTraits::ParticlePos; - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - - RealSpacePositions() : DynamicCoordinates(DynamicCoordinateKind::DC_POS) {} - - std::unique_ptr makeClone() override { return std::make_unique(*this); } - - void resize(size_t n) override { RSoA.resize(n); } - size_t size() const override { return RSoA.size(); } - - void setAllParticlePos(const ParticlePos& R) override - { - resize(R.size()); - RSoA.copyIn(R); - } - void setOneParticlePos(const PosType& pos, size_t iat) override { RSoA(iat) = pos; } - - void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const override - { - assert(this == &coords_list.getLeader()); - for (size_t iw = 0; iw < isAccepted.size(); iw++) - if (isAccepted[iw]) - coords_list[iw].setOneParticlePos(new_positions[iw], iat); - } - - const PosVectorSoa& getAllParticlePos() const override { return RSoA; } - PosType getOneParticlePos(size_t iat) const override { return RSoA[iat]; } +using RealSpacePositions = RealSpacePositionsT; -private: - ///particle positions in SoA layout - PosVectorSoa RSoA; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/RealSpacePositionsOMPTarget.h b/src/Particle/RealSpacePositionsOMPTarget.h index 310c71714a..9a7a34cf41 100644 --- a/src/Particle/RealSpacePositionsOMPTarget.h +++ b/src/Particle/RealSpacePositionsOMPTarget.h @@ -15,237 +15,12 @@ #ifndef QMCPLUSPLUS_REALSPACE_POSITIONS_OMPTARGET_H #define QMCPLUSPLUS_REALSPACE_POSITIONS_OMPTARGET_H -#include "Particle/DynamicCoordinates.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "ParticleSet.h" -#include "ResourceCollection.h" +#include "Configuration.h" +#include "Particle/RealSpacePositionsTOMPTarget.h" namespace qmcplusplus { -/** Introduced to handle virtual moves and ratio computations, e.g. for non-local PP evaluations. - */ -class RealSpacePositionsOMPTarget : public DynamicCoordinates -{ -public: - RealSpacePositionsOMPTarget() - : DynamicCoordinates(DynamicCoordinateKind::DC_POS_OFFLOAD), is_host_position_changed_(false) - {} - RealSpacePositionsOMPTarget(const RealSpacePositionsOMPTarget& in) - : DynamicCoordinates(DynamicCoordinateKind::DC_POS_OFFLOAD), RSoA(in.RSoA) - { - RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); - updateH2D(); - } - - std::unique_ptr makeClone() override - { - return std::make_unique(*this); - } - - void resize(size_t n) override - { - if (RSoA.size() != n) - { - RSoA.resize(n); - RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); - } - } - - size_t size() const override { return RSoA_hostview.size(); } - - void setAllParticlePos(const ParticlePos& R) override - { - resize(R.size()); - RSoA_hostview.copyIn(R); - updateH2D(); - is_nw_new_pos_prepared = false; - } - - void setOneParticlePos(const PosType& pos, size_t iat) override - { - RSoA_hostview(iat) = pos; - is_host_position_changed_ = true; - /* This was too slow due to overhead. - RealType x = pos[0]; - RealType y = pos[1]; - RealType z = pos[2]; - RealType* data = RSoA.data(); - size_t offset = RSoA.capacity(); - - PRAGMA_OFFLOAD("omp target map(to : x, y, z, iat)") - { - data[iat] = x; - data[iat + offset] = y; - data[iat + offset * 2] = z; - } - */ - } - - void mw_copyActivePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions) const override - { - assert(this == &coords_list.getLeader()); - auto& coords_leader = coords_list.getCastedLeader(); - - const auto nw = coords_list.size(); - auto& mw_new_pos = coords_leader.mw_mem_handle_.getResource().mw_new_pos; - mw_new_pos.resize(nw); - - for (int iw = 0; iw < nw; iw++) - mw_new_pos(iw) = new_positions[iw]; - - auto* mw_pos_ptr = mw_new_pos.data(); - PRAGMA_OFFLOAD("omp target update to(mw_pos_ptr[:QMCTraits::DIM * mw_new_pos.capacity()])") - - coords_leader.is_nw_new_pos_prepared = true; - } - - void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const override - { - assert(this == &coords_list.getLeader()); - const size_t nw = coords_list.size(); - auto& coords_leader = coords_list.getCastedLeader(); - MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_; - auto& mw_new_pos = mw_mem.mw_new_pos; - auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs; - auto& mw_accept_indices = mw_mem.mw_accept_indices; - - if (!is_nw_new_pos_prepared) - { - mw_copyActivePos(coords_list, iat, new_positions); - app_warning() << "This message only appear in unit tests. Report a bug if seen in production code." << std::endl; - } - - coords_leader.is_nw_new_pos_prepared = false; - - mw_accept_indices.resize(nw); - auto* restrict id_array = mw_accept_indices.data(); - - size_t num_accepted = 0; - for (int iw = 0; iw < nw; iw++) - if (isAccepted[iw]) - { - auto& coords = coords_list.getCastedElement(iw); - id_array[num_accepted] = iw; - // save new coordinates on host copy - coords.RSoA_hostview(iat) = mw_new_pos[iw]; - num_accepted++; - } - - // early return to avoid OpenMP runtime mishandling of size 0 in transfer/compute. - if (num_accepted == 0) - return; - - //offload to GPU - auto* restrict mw_pos_ptr = mw_new_pos.data(); - auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data(); - const size_t rsoa_stride = RSoA.capacity(); - const size_t mw_pos_stride = mw_new_pos.capacity(); - - PRAGMA_OFFLOAD("omp target teams distribute parallel for \ - map(always, to : id_array[:num_accepted])") - for (int i = 0; i < num_accepted; i++) - { - const int iw = id_array[i]; - RealType* RSoA_dev_ptr = mw_rosa_ptr[iw]; - for (int id = 0; id < QMCTraits::DIM; id++) - RSoA_dev_ptr[iat + rsoa_stride * id] = mw_pos_ptr[iw + mw_pos_stride * id]; - } - } - - const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } - PosType getOneParticlePos(size_t iat) const override { return RSoA_hostview[iat]; } - - void donePbyP() override - { - is_nw_new_pos_prepared = false; - if (is_host_position_changed_) - { - updateH2D(); - is_host_position_changed_ = false; - } - } - - const RealType* getDevicePtr() const { return RSoA.device_data(); } - - const auto& getFusedNewPosBuffer() const { return mw_mem_handle_.getResource().mw_new_pos; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const override - { - MultiWalkerMem& mw_mem = coords_list.getCastedLeader().mw_mem_handle_ = - collection.lendResource(); - - auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs); - const auto nw = coords_list.size(); - mw_rsoa_ptrs.resize(nw); - for (int iw = 0; iw < nw; iw++) - { - auto& coords = coords_list.getCastedElement(iw); - mw_rsoa_ptrs[iw] = coords.RSoA.device_data(); - } - mw_rsoa_ptrs.updateTo(); - } - - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const override - { - collection.takebackResource(coords_list.getCastedLeader().mw_mem_handle_); - } - - const auto& getMultiWalkerRSoADevicePtrs() const { return mw_mem_handle_.getResource().mw_rsoa_ptrs; } - -private: - ///particle positions in SoA layout - VectorSoaContainer>> RSoA; - - ///multi walker shared memory buffer - struct MultiWalkerMem : public Resource - { - ///one particle new/old positions in SoA layout - VectorSoaContainer>> mw_new_pos; - - /// accept list - Vector>> mw_accept_indices; - - /// RSoA device ptr list - Vector>> mw_rsoa_ptrs; - - MultiWalkerMem() : Resource("MultiWalkerMem") {} - - MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - ///host view of RSoA - PosVectorSoa RSoA_hostview; - - ///if true, host position has been changed while the device copy has not been updated. - bool is_host_position_changed_; - - ///if true, mw_new_pos has been updated with active positions. - bool is_nw_new_pos_prepared; +using RealSpacePositionsOMPTarget = RealSpacePositionsTOMPTarget; - void updateH2D() - { - RealType* data = RSoA.data(); - PRAGMA_OFFLOAD("omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])") - is_host_position_changed_ = false; - } -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/RealSpacePositionsTOMPTarget.h b/src/Particle/RealSpacePositionsTOMPTarget.h index d3a5833506..7f0f52b16b 100644 --- a/src/Particle/RealSpacePositionsTOMPTarget.h +++ b/src/Particle/RealSpacePositionsTOMPTarget.h @@ -14,6 +14,7 @@ #ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H #define QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H +#include "Configuration.h" #include "OMPTarget/OMPallocator.hpp" #include "OhmmsSoA/VectorSoaContainer.h" #include "Particle/DynamicCoordinatesT.h" diff --git a/src/Particle/Reptile.h b/src/Particle/Reptile.h index 1738edcf22..d32143a4ca 100644 --- a/src/Particle/Reptile.h +++ b/src/Particle/Reptile.h @@ -25,256 +25,12 @@ #ifndef QMCPLUSPLUS_REPTILE_H #define QMCPLUSPLUS_REPTILE_H -#include "QMCDrivers/DriftOperators.h" -#include "QMCDrivers/WalkerProperties.h" #include "Configuration.h" -#include "Walker.h" +#include "Particle/ReptileT.h" namespace qmcplusplus { -class MCWalkerConfiguration; - -class Reptile : public QMCTraits -{ -public: - using WP = WalkerProperties::Indexes; - using Walker_t = MCWalkerConfiguration::Walker_t; - //using Buffer_t = Walker_t::Buffer_t ; - // using Walker_t = MCWalkerConfiguration::Walker_t; - using WalkerIter_t = MCWalkerConfiguration::iterator; - using ReptileConfig_t = std::vector; - - std::vector Action; - std::vector TransProb; - - RealType forwardprob; - RealType backwardprob; - RealType forwardaction; - RealType backwardaction; - - RealType tau; - - MCWalkerConfiguration& w; - WalkerIter_t repstart, repend; - IndexType direction, headindex, nbeads; - Walker_t* prophead; - - inline Reptile(MCWalkerConfiguration& W, WalkerIter_t start, WalkerIter_t end) - : w(W), - repstart(start), - repend(end), - direction(1), - headindex(0), - prophead(0) //, r2prop(0.0), r2accept(0.0),tau(0.0) - { - Action.resize(3); - Action[0] = w.addProperty("ActionBackward"); - Action[1] = w.addProperty("ActionForward"); - Action[2] = w.addProperty("ActionLocal"); - TransProb.resize(2); - TransProb[0] = w.addProperty("TransProbBackward"); - TransProb[1] = w.addProperty("TransProbForward"); - - nbeads = repend - repstart; - } - - ~Reptile() {} - - inline IndexType size() { return nbeads; } - - inline Walker_t& operator[](IndexType i) { return getWalker(getBeadIndex(i)); } - - inline IndexType wrapIndex(IndexType repindex) { return (repindex % nbeads + nbeads) % nbeads; } - - inline Walker_t& getWalker(IndexType i) - { - WalkerIter_t bead = repstart + wrapIndex(i); - return **bead; - } - - inline IndexType getBeadIndex(IndexType i) { return wrapIndex(headindex + direction * i); } - inline Walker_t& getBead(IndexType i) { return getWalker(getBeadIndex(i)); } - inline Walker_t& getHead() { return getWalker(getBeadIndex(0)); } - inline Walker_t& getTail() { return getWalker(getBeadIndex(nbeads - 1)); } - inline Walker_t& getNext() { return getWalker(getBeadIndex(nbeads - 2)); } - inline Walker_t& getCenter() { return getWalker(getBeadIndex((nbeads - 1) / 2)); } - //inline void setProposedHead(){ - - inline void flip() - { - // direction*=-1; - // headindex = getBeadIndex(nbeads-1); - headindex = wrapIndex(headindex - direction); - direction *= -1; - } - - inline void setDirection(IndexType dir) { direction = dir; } - - inline void setBead(Walker_t& walker, IndexType i) - { - IndexType index = getBeadIndex(i); - Walker_t& newbead(getWalker(index)); - newbead = walker; //This should be a hard copy - } - - inline void setHead(Walker_t& overwrite) - { - //overwrite last element. - headindex = getBeadIndex(nbeads - 1); //sets to position of tail. - Walker_t& newhead(getBead(0)); - newhead = overwrite; - } - //This function does two things: 1.) Moves the reptile forward 1 step. 2.) Returns the new head. - inline Walker_t& getNewHead() - { - //overwrite last element. - headindex = getBeadIndex(nbeads - 1); //sets to position of tail. - return getWalker(headindex); - } - - void saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType actionindex = 2; - if (direction != 0) - actionindex = (1 - d * direction) / 2; - walker.Properties(nPsi, Action[actionindex]) = val; - } - - RealType getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType actionindex = 2; - if (d != 0) - actionindex = (1 - direction * d) / 2; - - return walker.Properties(nPsi, Action[actionindex]); - } - - RealType getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d, IndexType nPsi = 0) - { - RealType af = getDirectionalAction(old_walker, +1, nPsi); - RealType ab = getDirectionalAction(new_walker, -1, nPsi); - RealType a0 = getDirectionalAction(old_walker, 0, nPsi) + getDirectionalAction(new_walker, 0, nPsi); - return af + ab + a0; - } - - void saveTransProb(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - walker.Properties(nPsi, TransProb[transindex]) = val; - } - - void saveTransProb(ParticleSet& W, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - W.Properties(nPsi, TransProb[transindex]) = val; - } - RealType getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - return walker.Properties(nPsi, TransProb[transindex]); - } - RealType getTransProb(ParticleSet& W, IndexType d, RealType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - return W.Properties(nPsi, TransProb[transindex]); - } - - inline void printState() - { - app_log() << "********PRINT REPTILE STATE*********\n"; - app_log() << "Direction=" << direction << " Headindex=" << headindex << " tail=" << getBeadIndex(nbeads - 1) - << "\n next=" << getBeadIndex(nbeads - 2) << " nbeads=" << nbeads << std::endl; - app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]\tAction[2]\t\n"; - for (int i = 0; i < nbeads; i++) - { - app_log() << i << "\t" << getBeadIndex(i) << "\t" << getBead(i).Properties(WP::LOCALENERGY) << "\t" - << getBead(i).Properties(Action[0]) << "\t" << getBead(i).Properties(Action[1]) << "\t" - << getBead(i).Properties(Action[2]) << "\n"; - } - app_log() << "POSITIONS===============:\n"; - for (int i = 0; i < nbeads; i++) - { - // app_log()<length of reptile, then return the last bead. if t<0; return the first bead. - inline Walker_t::ParticlePos linearInterp(RealType t) - { - IndexType nbead = - IndexType(t / tau); //Calculate the lower bound on the timeslice. t is between binnum*Tau and (binnum+1)Tau - RealType beadfrac = t / tau - nbead; //the fractional coordinate between n and n+1 bead - if (nbead <= 0) - { - ParticleSet::ParticlePos result = getHead().R; - return result; - } - else if (nbead >= nbeads - 1) - { - ParticleSet::ParticlePos result = getTail().R; - return result; - } - - else - { - Walker_t::ParticlePos dR(getBead(nbead + 1).R), interpR(getBead(nbead).R); - dR = dR - getBead(nbead).R; - - interpR = getBead(nbead).R + beadfrac * dR; - return interpR; - } - } - inline ReptileConfig_t getReptileSlicePositions(RealType tau, RealType beta) - { - IndexType nbeads_new = IndexType(beta / tau); - ReptileConfig_t new_reptile_coords(0); - - for (IndexType i = 0; i < nbeads_new; i++) - new_reptile_coords.push_back(linearInterp(tau * i)); - - return new_reptile_coords; - } - - inline void setReptileSlicePositions(ReptileConfig_t& rept) - { - if (rept.size() == nbeads) - { - for (int i = 0; i < nbeads; i++) - getBead(i).R = rept[i]; - } - else - ; - } - - inline void setReptileSlicePositions(Walker_t::ParticlePos R) - { - for (int i = 0; i < nbeads; i++) - getBead(i).R = R; - } -}; - +using Reptile = ReptileT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SampleStack.cpp b/src/Particle/SampleStack.cpp deleted file mode 100644 index c2720b9b8c..0000000000 --- a/src/Particle/SampleStack.cpp +++ /dev/null @@ -1,60 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SampleStack.h" -#include "Utilities/IteratorUtility.h" - -namespace qmcplusplus -{ - -/** allocate the SampleStack - * @param n number of samples per rank - * @param num_ranks number of ranks. Used to set global number of samples. - */ -void SampleStack::setMaxSamples(size_t n, size_t num_ranks) -{ - max_samples_ = n; - global_num_samples_ = n * num_ranks; - current_sample_count_ = std::min(current_sample_count_, max_samples_); - sample_vector_.resize(n, MCSample(0)); -} - -const MCSample& SampleStack::getSample(size_t i) const { return sample_vector_[i]; } - -void SampleStack::appendSample(MCSample&& sample) -{ - // Ignore samples in excess of the expected number of samples - if (current_sample_count_ < max_samples_) - { - sample_vector_[current_sample_count_] = std::move(sample); - current_sample_count_++; - } -} - -/** load a single sample from SampleStack - */ -void SampleStack::loadSample(ParticleSet& pset, size_t iw) const -{ - pset.R = sample_vector_[iw].R; - pset.spins = sample_vector_[iw].spins; -} - -void SampleStack::clearEnsemble() -{ - sample_vector_.clear(); - current_sample_count_ = 0; -} - -void SampleStack::resetSampleCount() { current_sample_count_ = 0; } - - -} // namespace qmcplusplus diff --git a/src/Particle/SampleStack.h b/src/Particle/SampleStack.h index 3614f53558..9565ceac02 100644 --- a/src/Particle/SampleStack.h +++ b/src/Particle/SampleStack.h @@ -18,50 +18,12 @@ #ifndef QMCPLUSPLUS_SAMPLE_STACK_H #define QMCPLUSPLUS_SAMPLE_STACK_H -#include -#include "Particle/ParticleSet.h" -#include "Particle/MCSample.h" -#include "Particle/Walker.h" -#include "Particle/WalkerConfigurations.h" +#include "Configuration.h" +#include "Particle/SampleStackT.h" namespace qmcplusplus { -class SampleStack -{ -public: - using PropertySetType = QMCTraits::PropertySetType; - - size_t getMaxSamples() const { return max_samples_; } - - bool empty() const { return sample_vector_.empty(); } - - const MCSample& getSample(size_t i) const; - - //@{save/load/clear function for optimization - inline size_t getNumSamples() const { return current_sample_count_; } - ///set the number of max samples per rank. - void setMaxSamples(size_t n, size_t number_of_ranks = 1); - /// Global number of samples is number of samples per rank * number of ranks - size_t getGlobalNumSamples() const { return global_num_samples_; } - /// load a single sample from SampleStack - void loadSample(ParticleSet& pset, size_t iw) const; - - void appendSample(MCSample&& sample); - - ///clear the ensemble - void clearEnsemble(); - //@} - /// Set the sample count to zero but preserve the storage - void resetSampleCount(); - -private: - size_t max_samples_{10}; - size_t current_sample_count_{0}; - size_t global_num_samples_{max_samples_}; - - std::vector sample_vector_; -}; - +using SampleStack = SampleStackT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SimulationCell.cpp b/src/Particle/SimulationCell.cpp deleted file mode 100644 index cbd9b5f053..0000000000 --- a/src/Particle/SimulationCell.cpp +++ /dev/null @@ -1,65 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2021 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SimulationCell.h" - -namespace qmcplusplus -{ - -SimulationCell::SimulationCell() = default; - -SimulationCell::SimulationCell(const Lattice& lattice) - : lattice_(lattice) -{ - resetLRBox(); -} - -void SimulationCell::resetLRBox() -{ - if (lattice_.SuperCellEnum != SUPERCELL_OPEN) - { - lattice_.SetLRCutoffs(lattice_.Rv); - LRBox_ = lattice_; - bool changed = false; - if (lattice_.SuperCellEnum == SUPERCELL_SLAB && lattice_.VacuumScale != 1.0) - { - LRBox_.R(2, 0) *= lattice_.VacuumScale; - LRBox_.R(2, 1) *= lattice_.VacuumScale; - LRBox_.R(2, 2) *= lattice_.VacuumScale; - changed = true; - } - else if (lattice_.SuperCellEnum == SUPERCELL_WIRE && lattice_.VacuumScale != 1.0) - { - LRBox_.R(1, 0) *= lattice_.VacuumScale; - LRBox_.R(1, 1) *= lattice_.VacuumScale; - LRBox_.R(1, 2) *= lattice_.VacuumScale; - LRBox_.R(2, 0) *= lattice_.VacuumScale; - LRBox_.R(2, 1) *= lattice_.VacuumScale; - LRBox_.R(2, 2) *= lattice_.VacuumScale; - changed = true; - } - LRBox_.reset(); - LRBox_.SetLRCutoffs(LRBox_.Rv); - LRBox_.printCutoffs(app_log()); - - if (changed) - { - app_summary() << " Simulation box changed by vacuum supercell conditions" << std::endl; - app_log() << "--------------------------------------- " << std::endl; - LRBox_.print(app_log()); - app_log() << "--------------------------------------- " << std::endl; - } - - k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim); - } -} -} diff --git a/src/Particle/SimulationCell.h b/src/Particle/SimulationCell.h index 87f630812f..f632846d99 100644 --- a/src/Particle/SimulationCell.h +++ b/src/Particle/SimulationCell.h @@ -14,41 +14,11 @@ #define QMCPLUSPLUS_SIMULATIONCELL_H #include "Configuration.h" -#include "LongRange/KContainer.h" +#include "SimulationCellT.h" namespace qmcplusplus { -class ParticleSetPool; +using SimulationCell = SimulationCellT; -class SimulationCell -{ -public: - using Lattice = PtclOnLatticeTraits::ParticleLayout; - - SimulationCell(); - SimulationCell(const Lattice& lattice); - - const Lattice& getLattice() const { return lattice_; } - const Lattice& getPrimLattice() const { return primative_lattice_; } - const Lattice& getLRBox() const { return LRBox_; } - - void resetLRBox(); - - /// access k_lists_ read only - const KContainer& getKLists() const { return k_lists_; } - -private: - ///simulation cell lattice - Lattice lattice_; - ///Primative cell lattice - Lattice primative_lattice_; - ///long-range box - Lattice LRBox_; - - /// K-Vector List. - KContainer k_lists_; - - friend class ParticleSetPool; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableAA.h b/src/Particle/SoaDistanceTableAA.h index ec9e8315ca..d8ecec84e8 100644 --- a/src/Particle/SoaDistanceTableAA.h +++ b/src/Particle/SoaDistanceTableAA.h @@ -13,197 +13,11 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AA_H #define QMCPLUSPLUS_DTDIMPL_AA_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" +#include "Particle/SoaDistanceTableAAT.h" namespace qmcplusplus { -/**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for dense case - */ template -struct SoaDistanceTableAA : public DTD_BConds, public DistanceTableAA -{ - /// actual memory for dist and displacements_ - aligned_vector memory_pool_; - - SoaDistanceTableAA(ParticleSet& target) - : DTD_BConds(target.getLattice()), - DistanceTableAA(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(num_targets_)), -#if !defined(NDEBUG) - old_prepared_elec_id_(-1), -#endif - evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") + target.getName() + "_" + target.getName(), - timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAA::move_") + target.getName() + "_" + target.getName(), - timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAA::update_") + target.getName() + "_" + target.getName(), - timer_level_fine)) - { - resize(); - } - - SoaDistanceTableAA() = delete; - SoaDistanceTableAA(const SoaDistanceTableAA&) = delete; - ~SoaDistanceTableAA() override {} - - size_t compute_size(int N) const - { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; - } - - void resize() - { - // initialize memory containers and views - const size_t total_size = compute_size(num_targets_); - memory_pool_.resize(total_size * (1 + D)); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); - displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); - } - - old_r_.resize(num_targets_); - old_dr_.resize(num_targets_); - temp_r_.resize(num_targets_); - temp_dr_.resize(num_targets_); - } - - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); - constexpr T BigR = std::numeric_limits::max(); - for (int iat = 1; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), - displacements_[iat], 0, iat, iat); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - -#if !defined(NDEBUG) - old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, 0, - num_targets_, iat); - // set up old_r_ and old_dr_ for moves may get accepted. - if (prepare_old) - { - //recompute from scratch - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), old_r_.data(), old_dr_, - 0, num_targets_, iat); - old_r_[iat] = std::numeric_limits::max(); //assign a big number - } - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - //ensure there are neighbors - assert(num_targets_ > 1); - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_targets_; ++jat) - if (temp_r_[jat] < min_dist && jat != iat) - { - min_dist = temp_r_[jat]; - index = jat; - } - assert(index >= 0); - dr = temp_dr_[index]; - } - else - { - for (int jat = 0; jat < iat; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - for (int jat = iat + 1; jat < num_targets_; ++jat) - if (distances_[jat][iat] < min_dist) - { - min_dist = distances_[jat][iat]; - index = jat; - } - assert(index != iat && index >= 0); - if (index < iat) - dr = displacements_[iat][index]; - else - dr = displacements_[index][iat]; - } - r = min_dist; - return index; - } - - /** After accepting the iat-th particle, update the iat-th row of distances_ and displacements_. - * Upper triangle is not needed in the later computation and thus not updated - */ - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - //update [0, iat) - const int nupdate = iat; - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[iat].data(idim)); - //copy column - for (size_t i = iat + 1; i < num_targets_; ++i) - { - distances_[i][iat] = temp_r_[i]; - displacements_[i](iat) = -temp_dr_[i]; - } - } - - void updatePartial(IndexType jat, bool from_temp) override - { - ScopedTimer local_timer(update_timer_); - //update [0, jat) - const int nupdate = jat; - if (from_temp) - { - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - else - { - assert(old_prepared_elec_id_ == jat); - //copy row - assert(nupdate <= old_r_.size()); - std::copy_n(old_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(old_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - } - -private: - ///number of targets with padding - const size_t num_targets_padded_; -#if !defined(NDEBUG) - /** set to particle id after move() with prepare_old = true. -1 means not prepared. - * It is intended only for safety checks, not for codepath selection. - */ - int old_prepared_elec_id_; -#endif - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; +using SoaDistanceTableAA = SoaDistanceTableAAT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableAAOMPTarget.h b/src/Particle/SoaDistanceTableAAOMPTarget.h index d5b8c5f1da..5eb91e236e 100644 --- a/src/Particle/SoaDistanceTableAAOMPTarget.h +++ b/src/Particle/SoaDistanceTableAAOMPTarget.h @@ -14,13 +14,7 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableAATOMPTarget.h" namespace qmcplusplus { @@ -28,480 +22,7 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public DistanceTableAA -{ - /// actual memory for dist and displacements_ - aligned_vector memory_pool_; - - /// actual memory for temp_r_ - DistRow temp_r_mem_; - /// actual memory for temp_dr_ - DisplRow temp_dr_mem_; - /// actual memory for old_r_ - DistRow old_r_mem_; - /// actual memory for old_dr_ - DisplRow old_dr_mem_; - - ///multi walker shared memory buffer - struct DTAAMultiWalkerMem : public Resource - { - ///dist displ for temporary and old pairs - Vector>> mw_new_old_dist_displ; - - /** distances from a range of indics to the source. - * for original particle index i (row) and source particle id j (col) - * j < i, the element data is dist(r_i - r_j) - * j > i, the element data is dist(r_(n - 1 - i) - r_(n - 1 - j)) - */ - Vector>> mw_distances_subset; - - DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") {} - - DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - SoaDistanceTableAAOMPTarget(ParticleSet& target) - : DTD_BConds(target.getLattice()), - DistanceTableAA(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(num_targets_)), -#if !defined(NDEBUG) - old_prepared_elec_id_(-1), -#endif - offload_timer_(createGlobalTimer(std::string("DTAAOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTAAOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAAOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAAOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&target.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - resize(); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - } - - SoaDistanceTableAAOMPTarget() = delete; - SoaDistanceTableAAOMPTarget(const SoaDistanceTableAAOMPTarget&) = delete; - ~SoaDistanceTableAAOMPTarget(){PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")} - - size_t compute_size(int N) const - { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; - } - - void resize() - { - // initialize memory containers and views - const size_t total_size = compute_size(num_targets_); - memory_pool_.resize(total_size * (1 + D)); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); - displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); - } - - old_r_mem_.resize(num_targets_); - old_dr_mem_.resize(num_targets_); - temp_r_mem_.resize(num_targets_); - temp_dr_mem_.resize(num_targets_); - } - - const RealType* getMultiWalkerTempDataPtr() const override - { - return mw_mem_handle_.getResource().mw_new_old_dist_displ.data(); - } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - - auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ; - mw_new_old_dist_displ.resize(nw * 2 * stride_size); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * iw, num_targets_padded_); - dt.temp_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * iw + num_targets_padded_); - dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw), num_targets_padded_); - dt.old_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * (iw + nw) + num_targets_padded_); - } - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - const size_t nw = dt_list.size(); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - } - - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); - - constexpr T BigR = std::numeric_limits::max(); - for (int iat = 1; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), - displacements_[iat], 0, iat, iat); - } - - /** compute distances from particles in [range_begin, range_end) to all the particles. - * Although [range_begin, range_end) and be any particle [0, num_sources), it is only necessary to compute - * half of the table due to the symmetry of AA table. See note of the output data object mw_distances_subset - * To keep resident memory minimal on the device, range_end - range_begin < num_particls_stored is required. - */ - const RealType* mw_evalDistsInRange(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - size_t range_begin, - size_t range_end) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - const size_t subset_size = range_end - range_begin; - if (subset_size > dt_leader.num_particls_stored) - throw std::runtime_error("not enough internal buffer"); - - ScopedTimer local_timer(dt_leader.evaluate_timer_); - - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - const size_t nw = dt_list.size(); - const auto num_sources_local = dt_leader.num_targets_; - const auto num_padded = dt_leader.num_targets_padded_; - mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded); - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* dist_ranged = mw_mem.mw_distances_subset.data(); - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams)") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - { - for (int irow = 0; irow < subset_size; irow++) - { - T* dist = dist_ranged + (irow + subset_size * iw) * num_padded; - size_t id_target = irow + range_begin; +using SoaDistanceTableAAOMPTarget = SoaDistanceTableAATOMPTarget; - T dx, dy, dz; - if (id_target < iel) - { - dx = source_pos_ptr[id_target] - source_pos_ptr[iel]; - dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded]; - dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2]; - } - else - { - const size_t id_target_reverse = num_sources_local - 1 - id_target; - const size_t iel_reverse = num_sources_local - 1 - iel; - dx = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse]; - dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded]; - dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2]; - } - - dist[iel] = DTD_BConds::computeDist(dx, dy, dz); - } - } - } - } - return mw_mem.mw_distances_subset.data(); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - -#if !defined(NDEBUG) - old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size()); - temp_dr_.attachReference(temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data()); - - assert((prepare_old && iat >= 0 && iat < num_targets_) || !prepare_old); - DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, 0, - num_targets_, iat); - // set up old_r_ and old_dr_ for moves may get accepted. - if (prepare_old) - { - old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size()); - old_dr_.attachReference(old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data()); - //recompute from scratch - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), old_r_.data(), old_dr_, - 0, num_targets_, iat); - old_r_[iat] = std::numeric_limits::max(); //assign a big number - } - } - - /** evaluate the temporary pair relations when a move is proposed - * this implementation is asynchronous and the synchronization is managed at ParticleSet. - * Transferring results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST. - * If the temporary pair distance are consumed on the device directly, the device to host data transfer can be - * skipped as an optimization. - */ - void mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& rnew_list, - const IndexType iat, - bool prepare_old = true) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - ScopedTimer local_timer(move_timer_); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ; - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); -#if !defined(NDEBUG) - dt.old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - auto& coordinates_soa = static_cast(p_list[iw].getCoordinates()); - } - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - const auto num_sources_local = num_targets_; - const auto num_padded = num_targets_padded_; - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* r_dr_ptr = mw_new_old_dist_displ.data(); - auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data(); - const size_t new_pos_stride = coordinates_leader.getFusedNewPosBuffer().capacity(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams) \ - nowait depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - { // temp - auto* r_iw_ptr = r_dr_ptr + iw * stride_size; - auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = new_pos_ptr[idim * new_pos_stride + iw]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - } - - if (prepare_old) - { // old - auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size; - auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = source_pos_ptr[idim * num_padded + iat]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - r_iw_ptr[iat] = std::numeric_limits::max(); //assign a big number - } - } - } - - if (modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) - { - PRAGMA_OFFLOAD("omp target update nowait depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ - from(r_dr_ptr[:mw_new_old_dist_displ.size()])") - } - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - //ensure there are neighbors - assert(num_targets_ > 1); - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_targets_; ++jat) - if (temp_r_[jat] < min_dist && jat != iat) - { - min_dist = temp_r_[jat]; - index = jat; - } - assert(index >= 0); - dr = temp_dr_[index]; - } - else - { - for (int jat = 0; jat < iat; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - for (int jat = iat + 1; jat < num_targets_; ++jat) - if (distances_[jat][iat] < min_dist) - { - min_dist = distances_[jat][iat]; - index = jat; - } - assert(index != iat && index >= 0); - if (index < iat) - dr = displacements_[iat][index]; - else - dr = displacements_[index][iat]; - } - r = min_dist; - return index; - } - - /** After accepting the iat-th particle, update the iat-th row of distances_ and displacements_. - * Upper triangle is not needed in the later computation and thus not updated - */ - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - //update [0, iat) columns - const int nupdate = iat; - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[iat].data(idim)); - //copy column - for (size_t i = iat + 1; i < num_targets_; ++i) - { - distances_[i][iat] = temp_r_[i]; - displacements_[i](iat) = -temp_dr_[i]; - } - } - - void updatePartial(IndexType jat, bool from_temp) override - { - ScopedTimer local_timer(update_timer_); - - //update [0, jat) - const int nupdate = jat; - if (from_temp) - { - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - else - { - assert(old_prepared_elec_id_ == jat); - //copy row - assert(nupdate <= old_r_.size()); - std::copy_n(old_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(old_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - } - - void mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, - const std::vector& from_temp) override - { - // if temp data on host is not updated by mw_move during p-by-p moves, there is no need to update distance table - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)) - return; - - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - void mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - // if the distance table is not updated by mw_move during p-by-p, needs to recompute the whole table - // before being used by Hamiltonian if requested - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) && (modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP)) - mw_evaluate(dt_list, p_list); - } - - size_t get_num_particls_stored() const override { return num_particls_stored; } - -private: - ///number of targets with padding - const size_t num_targets_padded_; -#if !defined(NDEBUG) - /** set to particle id after move() with prepare_old = true. -1 means not prepared. - * It is intended only for safety checks, not for codepath selection. - */ - int old_prepared_elec_id_; -#endif - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; - /// the particle count of the internal stored distances. - const size_t num_particls_stored = 64; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableAATOMPTarget.h b/src/Particle/SoaDistanceTableAATOMPTarget.h index 85afcfa5c7..dededc19e6 100644 --- a/src/Particle/SoaDistanceTableAATOMPTarget.h +++ b/src/Particle/SoaDistanceTableAATOMPTarget.h @@ -88,7 +88,7 @@ struct SoaDistanceTableAATOMPTarget : SoaDistanceTableAATOMPTarget(ParticleSetT& target) : DTD_BConds(target.getLattice()), DistanceTableAAT(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(this->num_targets_)), + num_targets_padded_(getAlignedSize(this->num_targets_)), #if !defined(NDEBUG) old_prepared_elec_id_(-1), #endif @@ -123,11 +123,9 @@ struct SoaDistanceTableAATOMPTarget : size_t compute_size(int N) const { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + - (Alignment - 1) * num_padded) / - 2; + const size_t num_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; } void diff --git a/src/Particle/SoaDistanceTableAB.h b/src/Particle/SoaDistanceTableAB.h index f9b3c79cd7..c2eaf71446 100644 --- a/src/Particle/SoaDistanceTableAB.h +++ b/src/Particle/SoaDistanceTableAB.h @@ -13,136 +13,11 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AB_H #define QMCPLUSPLUS_DTDIMPL_AB_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "Utilities/FairDivide.h" -#include "Concurrency/OpenMP.h" +#include "Particle/SoaDistanceTableABT.h" namespace qmcplusplus { -/**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form - */ template -struct SoaDistanceTableAB : public DTD_BConds, public DistanceTableAB -{ - SoaDistanceTableAB(const ParticleSet& source, ParticleSet& target) - : DTD_BConds(source.getLattice()), - DistanceTableAB(source, target, DTModes::ALL_OFF), - evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") + target.getName() + "_" + source.getName(), - timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAB::move_") + target.getName() + "_" + source.getName(), - timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAB::update_") + target.getName() + "_" + source.getName(), - timer_level_fine)) - { - resize(); - } - - void resize() - { - if (num_sources_ * num_targets_ == 0) - return; - - // initialize memory containers and views - const int num_sources_padded = getAlignedSize(num_sources_); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].resize(num_sources_padded); - displacements_[i].resize(num_sources_padded); - } - - // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function - // temp_r_ is padded explicitly while temp_dr_ is padded internally - temp_r_.resize(num_sources_padded); - temp_dr_.resize(num_sources_); - } - - SoaDistanceTableAB() = delete; - SoaDistanceTableAB(const SoaDistanceTableAB&) = delete; - - /** evaluate the full table */ - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); -#pragma omp parallel - { - int first, last; - FairDivideAligned(num_sources_, getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - //be aware of the sign of Displacement - for (int iat = 0; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], first, last); - } - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - DTD_BConds::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, - 0, num_sources_); - // If the full table is not ready all the time, overwrite the current value. - // If this step is missing, DT values can be undefined in case a move is rejected. - if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], 0, num_sources_); - } - - ///update the stripe for jat-th particle - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - std::copy_n(temp_r_.data(), num_sources_, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim)); - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_sources_; ++jat) - if (temp_r_[jat] < min_dist) - { - min_dist = temp_r_[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = temp_dr_[index]; - } - } - else - { - for (int jat = 0; jat < num_sources_; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = displacements_[iat][index]; - } - } - assert(index >= 0 && index < num_sources_); - return index; - } - -private: - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; +using SoaDistanceTableAB = SoaDistanceTableABT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableABOMPTarget.h b/src/Particle/SoaDistanceTableABOMPTarget.h index 05dac1eaf1..c31b673a47 100644 --- a/src/Particle/SoaDistanceTableABOMPTarget.h +++ b/src/Particle/SoaDistanceTableABOMPTarget.h @@ -14,403 +14,15 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableABTOMPTarget.h" namespace qmcplusplus { /**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form + * @brief A derived classe from DistacneTableData, specialized for dense case */ template -class SoaDistanceTableABOMPTarget : public DTD_BConds, public DistanceTableAB -{ -private: - template - using OffloadPinnedVector = Vector>>; - - ///accelerator output buffer for r and dr - OffloadPinnedVector r_dr_memorypool_; - ///accelerator input array for a list of target particle positions, num_targets_ x D - OffloadPinnedVector target_pos; - - ///multi walker shared memory buffer - struct DTABMultiWalkerMem : public Resource - { - ///accelerator output array for multiple walkers, [1+D][num_targets_][num_padded] (distances, displacements) - OffloadPinnedVector mw_r_dr; - ///accelerator input buffer for multiple data set - OffloadPinnedVector offload_input; - - DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") {} - - DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - void resize() - { - if (num_sources_ * num_targets_ == 0) - return; - if (distances_.size()) - return; - - // initialize memory containers and views - const size_t num_padded = getAlignedSize(num_sources_); - const size_t stride_size = getPerTargetPctlStrideSize(); - r_dr_memorypool_.resize(stride_size * num_targets_); - - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, num_sources_); - displacements_[i].attachReference(num_sources_, num_padded, - r_dr_memorypool_.data() + i * stride_size + num_padded); - } - } - - static void associateResource(const RefVectorWithLeader& dt_list) - { - auto& dt_leader = dt_list.getCastedLeader(); - - // initialize memory containers and views - size_t count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - count_targets += dt.targets(); - dt.r_dr_memorypool_.free(); - } - - const size_t num_sources = dt_leader.num_sources_; - const size_t num_padded = getAlignedSize(dt_leader.num_sources_); - const size_t stride_size = num_padded * (D + 1); - const size_t total_targets = count_targets; - auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr; - mw_r_dr.resize(total_targets * stride_size); - - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - assert(num_sources == dt.num_sources_); - - dt.distances_.resize(dt.targets()); - dt.displacements_.resize(dt.targets()); - - for (int i = 0; i < dt.targets(); ++i) - { - dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources); - dt.displacements_[i].attachReference(num_sources, num_padded, - mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } - } - -public: - SoaDistanceTableABOMPTarget(const ParticleSet& source, ParticleSet& target) - : DTD_BConds(source.getLattice()), - DistanceTableAB(source, target, DTModes::ALL_OFF), - offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&source.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - - // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function - // temp_r_ is padded explicitly while temp_dr_ is padded internally - const int num_padded = getAlignedSize(num_sources_); - temp_r_.resize(num_padded); - temp_dr_.resize(num_sources_); - } - - SoaDistanceTableABOMPTarget() = delete; - SoaDistanceTableABOMPTarget(const SoaDistanceTableABOMPTarget&) = delete; - - ~SoaDistanceTableABOMPTarget() { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - associateResource(dt_list); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.distances_.clear(); - dt.displacements_.clear(); - } - } - - const T* getMultiWalkerDataPtr() const override { return mw_mem_handle_.getResource().mw_r_dr.data(); } - - size_t getPerTargetPctlStrideSize() const override { return getAlignedSize(num_sources_) * (D + 1); } - - /** evaluate the full table */ - inline void evaluate(ParticleSet& P) override - { - resize(); - - ScopedTimer local_timer(evaluate_timer_); - // be aware of the sign of Displacement - const int num_targets_local = num_targets_; - const int num_sources_local = num_sources_; - const int num_padded = getAlignedSize(num_sources_); - - target_pos.resize(num_targets_ * D); - for (size_t iat = 0; iat < num_targets_; iat++) - for (size_t idim = 0; idim < D; idim++) - target_pos[iat * D + idim] = P.R[iat][idim]; - - auto* target_pos_ptr = target_pos.data(); - auto* source_pos_ptr = origin_.getCoordinates().getAllParticlePos().data(); - auto* r_dr_ptr = distances_[0].data(); - assert(distances_[0].data() + num_padded == displacements_[0].data()); - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const size_t stride_size = getPerTargetPctlStrideSize(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(num_targets_*num_teams) \ - map(to: source_pos_ptr[:num_padded*D]) \ - map(always, to: target_pos_ptr[:num_targets_*D]) \ - map(always, from: r_dr_ptr[:num_targets_*stride_size])") - for (int iat = 0; iat < num_targets_local; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - auto* r_iat_ptr = r_dr_ptr + iat * stride_size; - auto* dr_iat_ptr = r_iat_ptr + num_padded; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - } - } - - inline void mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - - ScopedTimer local_timer(evaluate_timer_); - - const size_t nw = dt_list.size(); - DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& mw_r_dr = mw_mem.mw_r_dr; - - size_t count_targets = 0; - for (ParticleSet& p : p_list) - count_targets += p.getTotalNum(); - const size_t total_targets = count_targets; - - const int num_padded = getAlignedSize(num_sources_); - -#ifndef NDEBUG - const int stride_size = getPerTargetPctlStrideSize(); - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - - for (int i = 0; i < dt.targets(); ++i) - { - assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size); - assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } -#endif - - // This is horrible optimization putting different data types in a single buffer but allows a single H2D transfer - const size_t realtype_size = sizeof(RealType); - const size_t int_size = sizeof(int); - const size_t ptr_size = sizeof(RealType*); - auto& offload_input = mw_mem.offload_input; - offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size); - auto source_ptrs = reinterpret_cast(offload_input.data()); - auto target_positions = reinterpret_cast(offload_input.data() + ptr_size * nw); - auto walker_id_ptr = - reinterpret_cast(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size); - - count_targets = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - ParticleSet& pset(p_list[iw]); - - assert(dt.targets() == pset.getTotalNum()); - assert(num_sources_ == dt.num_sources_); - - auto& RSoA_OMPTarget = static_cast(dt.origin_.getCoordinates()); - source_ptrs[iw] = const_cast(RSoA_OMPTarget.getDevicePtr()); - - for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets) - { - walker_id_ptr[count_targets] = iw; - for (size_t idim = 0; idim < D; idim++) - target_positions[count_targets * D + idim] = pset.R[iat][idim]; - } - } - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto* r_dr_ptr = mw_r_dr.data(); - auto* input_ptr = offload_input.data(); - const int num_sources_local = num_sources_; - - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \ - map(always, to: input_ptr[:offload_input.size()]) \ - depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait") - for (int iat = 0; iat < total_targets; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* target_pos_ptr = reinterpret_cast(input_ptr + ptr_size * nw); - const int walker_id = - reinterpret_cast(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat]; - auto* source_pos_ptr = reinterpret_cast(input_ptr)[walker_id]; - auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1); - auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded; - - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - - if (!(modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) - { - PRAGMA_OFFLOAD( - "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") - } - // wait for computing and (optional) transferring back to host. - // It can potentially be moved to ParticleSet to fuse multiple similar taskwait - PRAGMA_OFFLOAD("omp taskwait") - } - } - - inline void mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& recompute) const override - { - mw_evaluate(dt_list, p_list); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - DTD_BConds::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, - 0, num_sources_); - // If the full table is not ready all the time, overwrite the current value. - // If this step is missing, DT values can be undefined in case a move is rejected. - if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], 0, num_sources_); - } - - ///update the stripe for jat-th particle - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - std::copy_n(temp_r_.data(), num_sources_, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim)); - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_sources_; ++jat) - if (temp_r_[jat] < min_dist) - { - min_dist = temp_r_[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = temp_dr_[index]; - } - } - else - { - for (int jat = 0; jat < num_sources_; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = displacements_[iat][index]; - } - } - assert(index >= 0 && index < num_sources_); - return index; - } +using SoaDistanceTableABOMPTarget = SoaDistanceTableABTOMPTarget; -private: - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/VirtualParticleSet.cpp b/src/Particle/VirtualParticleSet.cpp deleted file mode 100644 index b7f03870f3..0000000000 --- a/src/Particle/VirtualParticleSet.cpp +++ /dev/null @@ -1,243 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2021 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/** @file VirtualParticleSet.cpp - * A proxy class to the quantum ParticleSet - */ - -#include "Configuration.h" -#include "VirtualParticleSet.h" -#include "Particle/DistanceTable.h" -#include "Particle/createDistanceTable.h" -#include "QMCHamiltonians/NLPPJob.h" -#include "ResourceCollection.h" - -namespace qmcplusplus -{ - -struct VPMultiWalkerMem : public Resource -{ - /// multi walker reference particle - Vector> mw_refPctls; - - VPMultiWalkerMem() : Resource("VPMultiWalkerMem") {} - - VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } -}; - -VirtualParticleSet::VirtualParticleSet(const ParticleSet& p, int nptcl, size_t dt_count_limit) - : ParticleSet(p.getSimulationCell()) -{ - setName("virtual"); - - //initialize local data structure - setSpinor(p.isSpinor()); - TotalNum = nptcl; - R.resize(nptcl); - if (isSpinor()) - spins.resize(nptcl); - coordinates_->resize(nptcl); - - //create distancetables - assert(dt_count_limit <= p.getNumDistTables()); - if (dt_count_limit == 0) - dt_count_limit = p.getNumDistTables(); - for (int i = 0; i < dt_count_limit; ++i) - if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST) - addTable(p.getDistTable(i).get_origin()); - else - addTable(p.getDistTable(i).get_origin(), DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST); -} - -VirtualParticleSet::~VirtualParticleSet() = default; - -Vector>& VirtualParticleSet::getMultiWalkerRefPctls() -{ - return mw_mem_handle_.getResource().mw_refPctls; -} - -const Vector>& VirtualParticleSet::getMultiWalkerRefPctls() const -{ - return mw_mem_handle_.getResource().mw_refPctls; -} - -void VirtualParticleSet::createResource(ResourceCollection& collection) const -{ - collection.addResource(std::make_unique()); - ParticleSet::createResource(collection); -} - -void VirtualParticleSet::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& vp_list) -{ - auto& vp_leader = vp_list.getLeader(); - vp_leader.mw_mem_handle_ = collection.lendResource(); - - auto p_list = RefVectorWithLeaderParticleSet(vp_list); - ParticleSet::acquireResource(collection, p_list); -} - -void VirtualParticleSet::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& vp_list) -{ - collection.takebackResource(vp_list.getLeader().mw_mem_handle_); - auto p_list = RefVectorWithLeaderParticleSet(vp_list); - ParticleSet::releaseResource(collection, p_list); -} - -/// move virtual particles to new postions and update distance tables -void VirtualParticleSet::makeMoves(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - bool sphere, - int iat) -{ - if (sphere && iat < 0) - throw std::runtime_error( - "VirtualParticleSet::makeMoves is invoked incorrectly, the flag sphere=true requires iat specified!"); - onSphere = sphere; - refPS = refp; - refPtcl = jel; - refSourcePtcl = iat; - assert(R.size() == deltaV.size()); - for (size_t ivp = 0; ivp < R.size(); ivp++) - R[ivp] = refp.R[jel] + deltaV[ivp]; - if (refp.isSpinor()) - for (size_t ivp = 0; ivp < R.size(); ivp++) - spins[ivp] = refp.spins[jel]; //no spin deltas in this API - update(); -} - -/// move virtual particles to new postions and update distance tables -void VirtualParticleSet::makeMovesWithSpin(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - const std::vector& deltaS, - bool sphere, - int iat) -{ - assert(refp.isSpinor()); - if (sphere && iat < 0) - throw std::runtime_error( - "VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the flag sphere=true requires iat specified!"); - onSphere = sphere; - refPS = refp; - refPtcl = jel; - refSourcePtcl = iat; - assert(R.size() == deltaV.size()); - assert(spins.size() == deltaS.size()); - for (size_t ivp = 0; ivp < R.size(); ivp++) - { - R[ivp] = refp.R[jel] + deltaV[ivp]; - spins[ivp] = refp.spins[jel] + deltaS[ivp]; - } - update(); -} - -void VirtualParticleSet::mw_makeMoves(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& refp_list, - const RefVector>& deltaV_list, - const RefVector>& joblist, - bool sphere) -{ - auto& vp_leader = vp_list.getLeader(); - vp_leader.onSphere = sphere; - vp_leader.refPS = refp_list.getLeader(); - - const size_t nVPs = countVPs(vp_list); - auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); - mw_refPctls.resize(nVPs); - - RefVectorWithLeader p_list(vp_leader); - p_list.reserve(vp_list.size()); - - size_t ivp = 0; - for (int iw = 0; iw < vp_list.size(); iw++) - { - VirtualParticleSet& vp(vp_list[iw]); - const std::vector& deltaV(deltaV_list[iw]); - const NLPPJob& job(joblist[iw]); - - vp.onSphere = sphere; - vp.refPS = refp_list[iw]; - vp.refPtcl = job.electron_id; - vp.refSourcePtcl = job.ion_id; - assert(vp.R.size() == deltaV.size()); - for (size_t k = 0; k < vp.R.size(); k++, ivp++) - { - vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; - if (vp_leader.isSpinor()) - vp.spins[k] = refp_list[iw].spins[vp.refPtcl]; //no spin deltas in this API - mw_refPctls[ivp] = vp.refPtcl; - } - p_list.push_back(vp); - } - assert(ivp == nVPs); - - mw_refPctls.updateTo(); - ParticleSet::mw_update(p_list); -} - -void VirtualParticleSet::mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& refp_list, - const RefVector>& deltaV_list, - const RefVector>& deltaS_list, - const RefVector>& joblist, - bool sphere) -{ - auto& vp_leader = vp_list.getLeader(); - if (!vp_leader.isSpinor()) - throw std::runtime_error( - "VirtualParticleSet::mw_makeMovesWithSpin should not be called if particle sets aren't spionor types"); - vp_leader.onSphere = sphere; - vp_leader.refPS = refp_list.getLeader(); - - const size_t nVPs = countVPs(vp_list); - auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); - mw_refPctls.resize(nVPs); - - RefVectorWithLeader p_list(vp_leader); - p_list.reserve(vp_list.size()); - - size_t ivp = 0; - for (int iw = 0; iw < vp_list.size(); iw++) - { - VirtualParticleSet& vp(vp_list[iw]); - const std::vector& deltaV(deltaV_list[iw]); - const std::vector& deltaS(deltaS_list[iw]); - const NLPPJob& job(joblist[iw]); - - vp.onSphere = sphere; - vp.refPS = refp_list[iw]; - vp.refPtcl = job.electron_id; - vp.refSourcePtcl = job.ion_id; - assert(vp.R.size() == deltaV.size()); - assert(vp.spins.size() == deltaS.size()); - assert(vp.R.size() == vp.spins.size()); - for (size_t k = 0; k < vp.R.size(); k++, ivp++) - { - vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; - vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k]; - mw_refPctls[ivp] = vp.refPtcl; - } - p_list.push_back(vp); - } - assert(ivp == nVPs); - - mw_refPctls.updateTo(); - ParticleSet::mw_update(p_list); -} - -} // namespace qmcplusplus diff --git a/src/Particle/VirtualParticleSet.h b/src/Particle/VirtualParticleSet.h index 670f5902d2..552534efb0 100644 --- a/src/Particle/VirtualParticleSet.h +++ b/src/Particle/VirtualParticleSet.h @@ -18,136 +18,11 @@ #define QMCPLUSPLUS_VIRTUAL_PARTICLESET_H #include "Configuration.h" -#include "Particle/ParticleSet.h" -#include -#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "VirtualParticleSetT.h" namespace qmcplusplus { -// forward declaration. -class NonLocalECPComponent; -template -struct NLPPJob; -struct VPMultiWalkerMem; +using VirtualParticleSet = VirtualParticleSetT; -/** A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet - * Virtual moves are defined as moves being proposed but will never be accepted. - * VirtualParticleSet is introduced to avoid changing any internal states of the physical ParticleSet. - * For this reason, the physical ParticleSet is always marked const. - * It is heavily used by non-local PP evaluations. - */ -class VirtualParticleSet : public ParticleSet -{ -private: - /// true, if virtual particles are on a sphere for NLPP - bool onSphere; - /// multi walker resource - ResourceHandle mw_mem_handle_; - - Vector>& getMultiWalkerRefPctls(); - - /// ParticleSet this object refers to after makeMoves - std::optional> refPS; - -public: - /// Reference particle - int refPtcl; - /// Reference source particle, used when onSphere=true - int refSourcePtcl; - - /// ParticleSet this object refers to - const ParticleSet& getRefPS() const { return refPS.value(); } - - inline bool isOnSphere() const { return onSphere; } - - const Vector>& getMultiWalkerRefPctls() const; - - /** constructor - * @param p ParticleSet whose virtual moves are handled by this object - * @param nptcl number of virtual particles - * @param dt_count_limit distance tables corresepond to [0, dt_count_limit) of the reference particle set are created - */ - VirtualParticleSet(const ParticleSet& p, int nptcl, size_t dt_count_limit = 0); - - ~VirtualParticleSet(); - - /// initialize a shared resource and hand it to a collection - void createResource(ResourceCollection& collection) const; - /** acquire external resource and assocaite it with the list of ParticleSet - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); - /** release external resource - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); - - /** move virtual particles to new postions and update distance tables - * @param refp reference particle set - * @param jel reference particle that all the VP moves from - * @param deltaV Position delta for virtual moves. - * @param sphere set true if VP are on a sphere around the reference source particle - * @param iat reference source particle - */ - void makeMoves(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - bool sphere = false, - int iat = -1); - - /** move virtual particles to new postions and update distance tables - * @param refp reference particle set - * @param jel reference particle that all the VP moves from - * @param deltaV Position delta for virtual moves. - * @param deltaS Spin delta for virtual moves. - * @param sphere set true if VP are on a sphere around the reference source particle - * @param iat reference source particle - */ - void makeMovesWithSpin(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - const std::vector& deltaS, - bool sphere = false, - int iat = -1); - - static void mw_makeMoves(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& p_list, - const RefVector>& deltaV_list, - const RefVector>& joblist, - bool sphere); - - static void mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& p_list, - const RefVector>& deltaV_list, - const RefVector>& deltaS_list, - const RefVector>& joblist, - bool sphere); - - static RefVectorWithLeader RefVectorWithLeaderParticleSet( - const RefVectorWithLeader& vp_list) - { - RefVectorWithLeader ref_list(vp_list.getLeader()); - ref_list.reserve(ref_list.size()); - for (VirtualParticleSet& vp : vp_list) - ref_list.push_back(vp); - return ref_list; - } - - static size_t countVPs(const RefVectorWithLeader& vp_list) - { - size_t nVPs = 0; - for (const VirtualParticleSet& vp : vp_list) - nVPs += vp.getTotalNum(); - return nVPs; - } - - static size_t countVPs(const RefVectorWithLeader& vp_list) - { - size_t nVPs = 0; - for (const VirtualParticleSet& vp : vp_list) - nVPs += vp.getTotalNum(); - return nVPs; - } -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/VirtualParticleSetT.cpp b/src/Particle/VirtualParticleSetT.cpp index 1f896405fc..e208c7f8db 100644 --- a/src/Particle/VirtualParticleSetT.cpp +++ b/src/Particle/VirtualParticleSetT.cpp @@ -265,8 +265,11 @@ VirtualParticleSetT::mw_makeMovesWithSpin( ParticleSetT::mw_update(p_list); } +#ifndef QMC_COMPLEX template class VirtualParticleSetT; template class VirtualParticleSetT; +#else template class VirtualParticleSetT>; template class VirtualParticleSetT>; +#endif } // namespace qmcplusplus diff --git a/src/Particle/VirtualParticleSetT.h b/src/Particle/VirtualParticleSetT.h index 83e4d5aa57..97f8b62e36 100644 --- a/src/Particle/VirtualParticleSetT.h +++ b/src/Particle/VirtualParticleSetT.h @@ -16,7 +16,7 @@ #define QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H #include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" #include namespace qmcplusplus diff --git a/src/Particle/WalkerConfigurations.cpp b/src/Particle/WalkerConfigurations.cpp deleted file mode 100644 index a3959d1610..0000000000 --- a/src/Particle/WalkerConfigurations.cpp +++ /dev/null @@ -1,149 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign -// Bryan Clark, bclark@Princeton.edu, Princeton University -// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "WalkerConfigurations.h" -#include -#include "Utilities/IteratorUtility.h" - -namespace qmcplusplus -{ -WalkerConfigurations::WalkerConfigurations() = default; - -///default destructor -WalkerConfigurations::~WalkerConfigurations() { destroyWalkers(walker_list_.begin(), walker_list_.end()); } - - -void WalkerConfigurations::createWalkers(int n, size_t numPtcls) -{ - if (walker_list_.empty()) - { - while (n) - { - walker_list_.push_back(std::make_unique(numPtcls)); - --n; - } - } - else - { - if (walker_list_.size() >= n) - { - int iw = walker_list_.size(); //copy from the back - for (int i = 0; i < n; ++i) - { - walker_list_.push_back(std::make_unique(*walker_list_[--iw])); - } - } - else - { - int nc = n / walker_list_.size(); - int nw0 = walker_list_.size(); - for (int iw = 0; iw < nw0; ++iw) - { - for (int ic = 0; ic < nc; ++ic) - walker_list_.push_back(std::make_unique(*walker_list_[iw])); - } - n -= nc * nw0; - while (n > 0) - { - walker_list_.push_back(std::make_unique(*walker_list_[--nw0])); - --n; - } - } - } -} - - -void WalkerConfigurations::resize(int numWalkers, size_t numPtcls) -{ - int dn = numWalkers - walker_list_.size(); - if (dn > 0) - createWalkers(dn, numPtcls); - if (dn < 0) - { - int nw = -dn; - if (nw < walker_list_.size()) - { - walker_list_.erase(walker_list_.begin(), walker_list_.begin() - dn); - } - } -} - -///returns the next valid iterator -WalkerConfigurations::iterator WalkerConfigurations::destroyWalkers(iterator first, iterator last) -{ - return walker_list_.erase(first, last); -} - -void WalkerConfigurations::createWalkers(iterator first, iterator last) -{ - destroyWalkers(walker_list_.begin(), walker_list_.end()); - while (first != last) - { - walker_list_.push_back(std::make_unique(**first)); - ++first; - } -} - -void WalkerConfigurations::destroyWalkers(int nw) -{ - if (nw > walker_list_.size()) - { - app_warning() << " Cannot remove walkers. Current Walkers = " << walker_list_.size() << std::endl; - return; - } - nw = walker_list_.size() - nw; - int iw = nw; - walker_list_.erase(walker_list_.begin() + nw, walker_list_.end()); -} - -void WalkerConfigurations::copyWalkers(iterator first, iterator last, iterator it) -{ - while (first != last) - { - (*it++)->makeCopy(**first++); - } -} - -/** Make Metropolis move to the walkers and save in a temporary array. - * @param it the iterator of the first walker to work on - * @param tauinv inverse of the time step - * - * R + D + X - */ -void WalkerConfigurations::reset() -{ - for (auto& walker : walker_list_) - { - walker->Weight = 1.0; - walker->Multiplicity = 1.0; - } -} - -void WalkerConfigurations::putConfigurations(Walker_t::RealType* target, QMCTraits::FullPrecRealType* weights) const -{ - for (const auto& walker : walker_list_) - { - std::copy(get_first_address(walker->R), get_last_address(walker->R), target); - target += get_last_address(walker->R) - get_first_address(walker->R); - *weights = walker->Weight; - ++weights; - } -} - -} // namespace qmcplusplus diff --git a/src/Particle/WalkerConfigurations.h b/src/Particle/WalkerConfigurations.h index 1d150a2232..da0fe17853 100644 --- a/src/Particle/WalkerConfigurations.h +++ b/src/Particle/WalkerConfigurations.h @@ -23,168 +23,10 @@ #ifndef QMCPLUSPLUS_WALKERCONFIGURATIONS_H #define QMCPLUSPLUS_WALKERCONFIGURATIONS_H #include "Configuration.h" -#include "Particle/Walker.h" -#include "Utilities/IteratorUtility.h" +#include "Particle/WalkerConfigurationsT.h" namespace qmcplusplus { -/** Monte Carlo Data of an ensemble - * - * The quantities are shared by all the nodes in a group - * - NumSamples number of samples - * - Weight total weight of a sample - * - Energy average energy of a sample - * - Variance variance - * - LivingFraction fraction of walkers alive each step. - */ -template -struct MCDataType -{ - T NumSamples; - T RNSamples; - T Weight; - T Energy; - T AlternateEnergy; - T Variance; - T R2Accepted; - T R2Proposed; - T LivingFraction; -}; - -/** A set of light weight walkers that are carried between driver sections and restart - */ -class WalkerConfigurations -{ -public: - /// walker type - using Walker_t = Walker; - using FullPrecRealType = QMCTraits::FullPrecRealType; - ///container type of Walkers - using walker_list__t = std::vector>; - /// FIX: a type alias of iterator for an object should not be for just one of many objects it holds. - using iterator = walker_list__t::iterator; - ///const_iterator of Walker container - using const_iterator = walker_list__t::const_iterator; - - MCDataType EnsembleProperty; - - WalkerConfigurations(); - ~WalkerConfigurations(); - WalkerConfigurations(const WalkerConfigurations&) = delete; - WalkerConfigurations& operator=(const WalkerConfigurations&) = delete; - WalkerConfigurations(WalkerConfigurations&&) = default; - WalkerConfigurations& operator=(WalkerConfigurations&&) = default; - - /** create numWalkers Walkers - * - * Append Walkers to walker_list_. - */ - void createWalkers(int numWalkers, size_t numPtcls); - /** create walkers - * @param first walker iterator - * @param last walker iterator - */ - void createWalkers(iterator first, iterator last); - /** copy walkers - * @param first input walker iterator - * @param last input walker iterator - * @param start first target iterator - * - * No memory allocation is allowed. - */ - void copyWalkers(iterator first, iterator last, iterator start); - - /** destroy Walkers from itstart to itend - *@param first starting iterator of the walkers - *@param last ending iterator of the walkers - */ - iterator destroyWalkers(iterator first, iterator last); - - /** destroy Walkers - *@param nw number of walkers to be destroyed - */ - void destroyWalkers(int nw); - - ///clean up the walker list and make a new list - void resize(int numWalkers, size_t numPtcls); - - ///return the number of active walkers - inline size_t getActiveWalkers() const { return walker_list_.size(); } - ///return the total number of active walkers among a MPI group - inline size_t getGlobalNumWalkers() const { return walker_offsets_.empty() ? 0 : walker_offsets_.back(); } - ///return the total number of active walkers among a MPI group - - inline void setWalkerOffsets(const std::vector& o) { walker_offsets_ = o; } - inline const std::vector& getWalkerOffsets() const { return walker_offsets_; } - - /// return the first iterator - inline iterator begin() { return walker_list_.begin(); } - /// return the last iterator, [begin(), end()) - inline iterator end() { return walker_list_.end(); } - - /// return the first const_iterator - inline const_iterator begin() const { return walker_list_.begin(); } - - /// return the last const_iterator [begin(), end()) - inline const_iterator end() const { return walker_list_.end(); } - /**@}*/ - - /** clear the walker_list_ without destroying them - * - * Provide std::vector::clear interface - */ - inline void clear() { walker_list_.clear(); } - - /** insert elements - * @param it locator where the inserting begins - * @param first starting iterator - * @param last ending iterator - * - * Provide std::vector::insert interface - */ - template - inline void insert(iterator it, INPUT_ITER first, INPUT_ITER last) - { - walker_list_.insert(it, first, last); - } - - /** add Walker_t* at the end - * @param awalker pointer to a walker - * - * Provide std::vector::push_back interface - */ - inline void push_back(std::unique_ptr awalker) { walker_list_.push_back(std::move(awalker)); } - - /** delete the last Walker_t* - * - * Provide std::vector::pop_back interface - */ - inline void pop_back() { walker_list_.pop_back(); } - - inline Walker_t* operator[](int i) { return walker_list_[i].get(); } - - inline const Walker_t* operator[](int i) const { return walker_list_[i].get(); } - - /** reset the Walkers - */ - void reset(); - - ///save the particle positions of all the walkers into target - void putConfigurations(Walker_t::RealType* target, QMCTraits::FullPrecRealType* weights) const; - -protected: - ///a collection of walkers - walker_list__t walker_list_; - -private: - /** starting index of the walkers in a processor group - * - * walker_offsets_[0]=0 and walker_offsets_[walker_offsets_.size()-1]=total number of walkers in a group - * walker_offsets_[processorid+1]-walker_offsets_[processorid] is equal to the number of walkers on a processor, - * i.e., W.getActiveWalkers(). - * walker_offsets_ is added to handle parallel I/O with hdf5 - */ - std::vector walker_offsets_; -}; +using WalkerConfigurations = WalkerConfigurationsT; } // namespace qmcplusplus #endif diff --git a/src/Particle/WalkerConfigurationsT.cpp b/src/Particle/WalkerConfigurationsT.cpp new file mode 100644 index 0000000000..bf5642e2c3 --- /dev/null +++ b/src/Particle/WalkerConfigurationsT.cpp @@ -0,0 +1,170 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at +// Urbana-Champaign +// Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge +// National Laboratory Ye Luo, yeluo@anl.gov, Argonne +// National Laboratory Mark A. Berrill, berrillma@ornl.gov, +// Oak Ridge National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "WalkerConfigurationsT.h" + +#include "Utilities/IteratorUtility.h" +#include "Platforms/Host/OutputManager.h" + +#include + +namespace qmcplusplus +{ +template +WalkerConfigurationsT::WalkerConfigurationsT() = default; + +/// default destructor +template +WalkerConfigurationsT::~WalkerConfigurationsT() +{ + destroyWalkers(walker_list_.begin(), walker_list_.end()); +} + +template +void +WalkerConfigurationsT::createWalkers(int n, size_t numPtcls) +{ + if (walker_list_.empty()) { + while (n) { + walker_list_.push_back(std::make_unique(numPtcls)); + --n; + } + } + else { + if (walker_list_.size() >= n) { + int iw = walker_list_.size(); // copy from the back + for (int i = 0; i < n; ++i) { + walker_list_.push_back( + std::make_unique(*walker_list_[--iw])); + } + } + else { + int nc = n / walker_list_.size(); + int nw0 = walker_list_.size(); + for (int iw = 0; iw < nw0; ++iw) { + for (int ic = 0; ic < nc; ++ic) + walker_list_.push_back( + std::make_unique(*walker_list_[iw])); + } + n -= nc * nw0; + while (n > 0) { + walker_list_.push_back( + std::make_unique(*walker_list_[--nw0])); + --n; + } + } + } +} + +template +void +WalkerConfigurationsT::resize(int numWalkers, size_t numPtcls) +{ + int dn = numWalkers - walker_list_.size(); + if (dn > 0) + createWalkers(dn, numPtcls); + if (dn < 0) { + int nw = -dn; + if (nw < walker_list_.size()) { + walker_list_.erase(walker_list_.begin(), walker_list_.begin() - dn); + } + } +} + +/// returns the next valid iterator +template +typename WalkerConfigurationsT::iterator +WalkerConfigurationsT::destroyWalkers(iterator first, iterator last) +{ + return walker_list_.erase(first, last); +} + +template +void +WalkerConfigurationsT::createWalkers(iterator first, iterator last) +{ + destroyWalkers(walker_list_.begin(), walker_list_.end()); + while (first != last) { + walker_list_.push_back(std::make_unique(**first)); + ++first; + } +} + +template +void +WalkerConfigurationsT::destroyWalkers(int nw) +{ + if (nw > walker_list_.size()) { + app_warning() << " Cannot remove walkers. Current Walkers = " + << walker_list_.size() << std::endl; + return; + } + nw = walker_list_.size() - nw; + int iw = nw; + walker_list_.erase(walker_list_.begin() + nw, walker_list_.end()); +} + +template +void +WalkerConfigurationsT::copyWalkers( + iterator first, iterator last, iterator it) +{ + while (first != last) { + (*it++)->makeCopy(**first++); + } +} + +/** Make Metropolis move to the walkers and save in a temporary array. + * @param it the iterator of the first walker to work on + * @param tauinv inverse of the time step + * + * R + D + X + */ +template +void +WalkerConfigurationsT::reset() +{ + for (auto& walker : walker_list_) { + walker->Weight = 1.0; + walker->Multiplicity = 1.0; + } +} + +template +void +WalkerConfigurationsT::putConfigurations( + RealType* target, FullPrecRealType* weights) const +{ + for (const auto& walker : walker_list_) { + std::copy( + get_first_address(walker->R), get_last_address(walker->R), target); + target += get_last_address(walker->R) - get_first_address(walker->R); + *weights = walker->Weight; + ++weights; + } +} + +template class WalkerConfigurationsT; +template class WalkerConfigurationsT; +template class WalkerConfigurationsT>; +template class WalkerConfigurationsT>; + +} // namespace qmcplusplus diff --git a/src/Particle/WalkerConfigurationsT.h b/src/Particle/WalkerConfigurationsT.h new file mode 100644 index 0000000000..7b9cae36d3 --- /dev/null +++ b/src/Particle/WalkerConfigurationsT.h @@ -0,0 +1,258 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at +// Urbana-Champaign +// Ken Esler, kpesler@gmail.com, University of Illinois at +// Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Jeongnim Kim, +// jeongnim.kim@gmail.com, University of Illinois at +// Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge +// National Laboratory Raymond Clay III, +// j.k.rofling@gmail.com, Lawrence Livermore National +// Laboratory Ye Luo, yeluo@anl.gov, Argonne National +// Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge +// National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_WALKERCONFIGURATIONST_H +#define QMCPLUSPLUS_WALKERCONFIGURATIONST_H +#include "Particle/ParticleSetTraits.h" +#include "Particle/Walker.h" +#include "Utilities/IteratorUtility.h" + +namespace qmcplusplus +{ +/** Monte Carlo Data of an ensemble + * + * The quantities are shared by all the nodes in a group + * - NumSamples number of samples + * - Weight total weight of a sample + * - Energy average energy of a sample + * - Variance variance + * - LivingFraction fraction of walkers alive each step. + */ +template +struct MCDataType +{ + T NumSamples; + T RNSamples; + T Weight; + T Energy; + T AlternateEnergy; + T Variance; + T R2Accepted; + T R2Proposed; + T LivingFraction; +}; + +/** A set of light weight walkers that are carried between driver sections and + * restart + */ +template +class WalkerConfigurationsT +{ +public: + /// walker type + using Walker_t = Walker, LatticeParticleTraits>; + using RealType = typename Walker_t::RealType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + /// container type of Walkers + using walker_list__t = std::vector>; + /// FIX: a type alias of iterator for an object should not be for just one + /// of many objects it holds. + using iterator = typename walker_list__t::iterator; + /// const_iterator of Walker container + using const_iterator = typename walker_list__t::const_iterator; + + MCDataType EnsembleProperty; + + WalkerConfigurationsT(); + ~WalkerConfigurationsT(); + WalkerConfigurationsT(const WalkerConfigurationsT&) = delete; + WalkerConfigurationsT& + operator=(const WalkerConfigurationsT&) = delete; + WalkerConfigurationsT(WalkerConfigurationsT&&) = default; + WalkerConfigurationsT& + operator=(WalkerConfigurationsT&&) = default; + + /** create numWalkers Walkers + * + * Append Walkers to walker_list_. + */ + void + createWalkers(int numWalkers, size_t numPtcls); + /** create walkers + * @param first walker iterator + * @param last walker iterator + */ + void + createWalkers(iterator first, iterator last); + /** copy walkers + * @param first input walker iterator + * @param last input walker iterator + * @param start first target iterator + * + * No memory allocation is allowed. + */ + void + copyWalkers(iterator first, iterator last, iterator start); + + /** destroy Walkers from itstart to itend + *@param first starting iterator of the walkers + *@param last ending iterator of the walkers + */ + iterator + destroyWalkers(iterator first, iterator last); + + /** destroy Walkers + *@param nw number of walkers to be destroyed + */ + void + destroyWalkers(int nw); + + /// clean up the walker list and make a new list + void + resize(int numWalkers, size_t numPtcls); + + /// return the number of active walkers + inline size_t + getActiveWalkers() const + { + return walker_list_.size(); + } + /// return the total number of active walkers among a MPI group + inline size_t + getGlobalNumWalkers() const + { + return walker_offsets_.empty() ? 0 : walker_offsets_.back(); + } + /// return the total number of active walkers among a MPI group + + inline void + setWalkerOffsets(const std::vector& o) + { + walker_offsets_ = o; + } + inline const std::vector& + getWalkerOffsets() const + { + return walker_offsets_; + } + + /// return the first iterator + inline iterator + begin() + { + return walker_list_.begin(); + } + /// return the last iterator, [begin(), end()) + inline iterator + end() + { + return walker_list_.end(); + } + + /// return the first const_iterator + inline const_iterator + begin() const + { + return walker_list_.begin(); + } + + /// return the last const_iterator [begin(), end()) + inline const_iterator + end() const + { + return walker_list_.end(); + } + /**@}*/ + + /** clear the walker_list_ without destroying them + * + * Provide std::vector::clear interface + */ + inline void + clear() + { + walker_list_.clear(); + } + + /** insert elements + * @param it locator where the inserting begins + * @param first starting iterator + * @param last ending iterator + * + * Provide std::vector::insert interface + */ + template + inline void + insert(iterator it, INPUT_ITER first, INPUT_ITER last) + { + walker_list_.insert(it, first, last); + } + + /** add Walker_t* at the end + * @param awalker pointer to a walker + * + * Provide std::vector::push_back interface + */ + inline void + push_back(std::unique_ptr awalker) + { + walker_list_.push_back(std::move(awalker)); + } + + /** delete the last Walker_t* + * + * Provide std::vector::pop_back interface + */ + inline void + pop_back() + { + walker_list_.pop_back(); + } + + inline Walker_t* + operator[](int i) + { + return walker_list_[i].get(); + } + + inline const Walker_t* + operator[](int i) const + { + return walker_list_[i].get(); + } + + /** reset the Walkers + */ + void + reset(); + + /// save the particle positions of all the walkers into target + void + putConfigurations(RealType* target, FullPrecRealType* weights) const; + +protected: + /// a collection of walkers + walker_list__t walker_list_; + +private: + /** starting index of the walkers in a processor group + * + * walker_offsets_[0]=0 and walker_offsets_[walker_offsets_.size()-1]=total + * number of walkers in a group + * walker_offsets_[processorid+1]-walker_offsets_[processorid] is equal to + * the number of walkers on a processor, i.e., W.getActiveWalkers(). + * walker_offsets_ is added to handle parallel I/O with hdf5 + */ + std::vector walker_offsets_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/createDistanceTable.h b/src/Particle/createDistanceTable.h index 45d305f670..6b8b5dc97f 100644 --- a/src/Particle/createDistanceTable.h +++ b/src/Particle/createDistanceTable.h @@ -14,61 +14,6 @@ #ifndef QMCPLUSPLUS_DISTANCETABLE_H #define QMCPLUSPLUS_DISTANCETABLE_H -#include "Particle/ParticleSet.h" +#include "Particle/createDistanceTableT.h" -namespace qmcplusplus -{ -/** Class to manage multiple DistanceTable objects. - * - * \date 2008-09-19 - * static data members are removed. DistanceTable::add functions - * are kept for compatibility only. New codes should use a member function - * of ParticleSet to add a distance table - * int ParticleSet::addTable(const ParticleSet& source) - * - * \deprecated There is only one instance of the data memebers of - * DistanceTable in an application and the data are shared by many objects. - * Note that static data members and functions are used - * (based on singleton and factory patterns). - *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTable - * subject to different boundary conditions. - * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by DistanceTable - * to generically control the crystalline structure. - */ - -///free function to create a distable table of s-s -std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description); -std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description); - -inline std::unique_ptr createDistanceTable(ParticleSet& s, std::ostream& description) -{ - // during P-by-P move, the cost of single particle evaluation of distance tables - // is determined by the number of source particles. - // Thus the implementation selection is determined by the source particle set. - if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) - return createDistanceTableAAOMPTarget(s, description); - else - return createDistanceTableAA(s, description); -} - -///free function create a distable table of s-t -std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description); -std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, - ParticleSet& t, - std::ostream& description); - -inline std::unique_ptr createDistanceTable(const ParticleSet& s, - ParticleSet& t, - std::ostream& description) -{ - // during P-by-P move, the cost of single particle evaluation of distance tables - // is determined by the number of source particles. - // Thus the implementation selection is determined by the source particle set. - if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) - return createDistanceTableABOMPTarget(s, t, description); - else - return createDistanceTableAB(s, t, description); -} - -} // namespace qmcplusplus #endif diff --git a/src/Particle/createDistanceTableAA.cpp b/src/Particle/createDistanceTableAA.cpp deleted file mode 100644 index fccbdd9ff5..0000000000 --- a/src/Particle/createDistanceTableAA.cpp +++ /dev/null @@ -1,98 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAA.h" - -namespace qmcplusplus -{ -/** Adding SymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description) -{ - using RealType = OHMMS_PRECISION; - enum - { - DIM = OHMMS_DIM - }; - const int sc = s.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for similar particles (A-A):" << std::endl; - o << " source/target: " << s.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s); - } - - description << o.str() << std::endl; - return dt; -} - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableAAOMPTarget.cpp b/src/Particle/createDistanceTableAAOMPTarget.cpp deleted file mode 100644 index 14680e07d3..0000000000 --- a/src/Particle/createDistanceTableAAOMPTarget.cpp +++ /dev/null @@ -1,98 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAAOMPTarget.h" - -namespace qmcplusplus -{ -/** Adding SymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description) -{ - using RealType = OHMMS_PRECISION; - enum - { - DIM = OHMMS_DIM - }; - const int sc = s.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for similar particles (A-A):" << std::endl; - o << " source/target: " << s.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s); - } - - description << o.str() << std::endl; - return dt; -} - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableAB.cpp b/src/Particle/createDistanceTableAB.cpp deleted file mode 100644 index 9bb7a595f7..0000000000 --- a/src/Particle/createDistanceTableAB.cpp +++ /dev/null @@ -1,99 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAB.h" - -namespace qmcplusplus -{ -/** Adding AsymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description) -{ - using RealType = ParticleSet::RealType; - enum - { - DIM = OHMMS_DIM - }; - const int sc = t.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for dissimilar particles (A-B):" << std::endl; - o << " source: " << s.getName() << " target: " << t.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s, t); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - - description << o.str() << std::endl; - return dt; -} - - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableABOMPTarget.cpp b/src/Particle/createDistanceTableABOMPTarget.cpp deleted file mode 100644 index 5da851837f..0000000000 --- a/src/Particle/createDistanceTableABOMPTarget.cpp +++ /dev/null @@ -1,101 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableABOMPTarget.h" - -namespace qmcplusplus -{ -/** Adding AsymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, - ParticleSet& t, - std::ostream& description) -{ - using RealType = ParticleSet::RealType; - enum - { - DIM = OHMMS_DIM - }; - const int sc = t.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for dissimilar particles (A-B):" << std::endl; - o << " source: " << s.getName() << " target: " << t.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s, t); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - - description << o.str() << std::endl; - return dt; -} - - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableT.h b/src/Particle/createDistanceTableT.h index 64b81aae1e..1c022e7c5b 100644 --- a/src/Particle/createDistanceTableT.h +++ b/src/Particle/createDistanceTableT.h @@ -16,7 +16,7 @@ #ifndef QMCPLUSPLUS_DISTANCETABLET_H #define QMCPLUSPLUS_DISTANCETABLET_H -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { diff --git a/src/Particle/tests/CMakeLists.txt b/src/Particle/tests/CMakeLists.txt index 4442e8ed22..bf3ebe04c9 100644 --- a/src/Particle/tests/CMakeLists.txt +++ b/src/Particle/tests/CMakeLists.txt @@ -34,7 +34,8 @@ set(UTEST_NAME deterministic-unit_${UTEST_EXE}) add_executable( ${UTEST_EXE} test_distance_table.cpp - test_SoaDistanceTableAA.cpp) + test_SoaDistanceTableAA.cpp + ) target_link_libraries(${UTEST_EXE} catch_main qmcparticle) if(USE_OBJECT_TARGET) target_link_libraries(${UTEST_EXE} qmcutil qmcparticle_omptarget) diff --git a/src/Particle/tests/test_SoaDistanceTableAA.cpp b/src/Particle/tests/test_SoaDistanceTableAA.cpp index a49bafcfec..320ff68b84 100644 --- a/src/Particle/tests/test_SoaDistanceTableAA.cpp +++ b/src/Particle/tests/test_SoaDistanceTableAA.cpp @@ -29,7 +29,7 @@ TEST_CASE("SoaDistanceTableAA compute_size", "[distance_table]") elec.create({6, 4}); // using open BC - SoaDistanceTableAA dt_ee(elec); + SoaDistanceTableAAT dt_ee(elec); const size_t Alignment = getAlignment(); diff --git a/src/Particle/tests/test_particle_pool.cpp b/src/Particle/tests/test_particle_pool.cpp index 71ed80f361..48a0aabc6d 100644 --- a/src/Particle/tests/test_particle_pool.cpp +++ b/src/Particle/tests/test_particle_pool.cpp @@ -16,7 +16,7 @@ #include "Message/Communicate.h" #include "OhmmsData/Libxml2Doc.h" #include "Particle/ParticleSetPool.h" - +#include "Particle/MCWalkerConfiguration.h" #include #include diff --git a/src/QMCDrivers/DMC/DMCFactoryNew.h b/src/QMCDrivers/DMC/DMCFactoryNew.h index 524f3f3a2f..913ea60680 100644 --- a/src/QMCDrivers/DMC/DMCFactoryNew.h +++ b/src/QMCDrivers/DMC/DMCFactoryNew.h @@ -15,10 +15,10 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCWaveFunctions/WaveFunctionPool.h" #include "Message/Communicate.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class MCPopulation; class ProjectData; diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h index b19f043aa7..b66a86bca0 100644 --- a/src/QMCDrivers/MCPopulation.h +++ b/src/QMCDrivers/MCPopulation.h @@ -24,12 +24,7 @@ #include "QMCDrivers/WalkerElementsRef.h" #include "OhmmsPETE/OhmmsVector.h" #include "Utilities/FairDivide.h" - -// forward declaration -namespace optimize -{ -struct VariableSet; -} +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { @@ -38,13 +33,12 @@ class QMCHamiltonian; class MCPopulation { public: - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using WFBuffer = MCPWalker::WFBuffer_t; using RealType = QMCTraits::RealType; using Properties = MCPWalker::PropertyContainer_t; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; - using opt_variables_type = optimize::VariableSet; private: // Potential thread safety issue diff --git a/src/QMCDrivers/QMCDriver.h b/src/QMCDrivers/QMCDriver.h index 6f1f6bfdd3..2981558a59 100644 --- a/src/QMCDrivers/QMCDriver.h +++ b/src/QMCDrivers/QMCDriver.h @@ -35,6 +35,8 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" #include "QMCDrivers/SimpleFixedNodeBranch.h" +#include "Particle/MCWalkerConfiguration.h" + class Communicate; namespace qmcplusplus @@ -59,7 +61,6 @@ namespace qmcplusplus */ //forward declarations: Do not include headers if not needed -class MCWalkerConfiguration; class HDFWalkerOutput; class TraceManager; diff --git a/src/QMCDrivers/QMCDriverFactory.h b/src/QMCDrivers/QMCDriverFactory.h index c1dd191b8f..71560c8245 100644 --- a/src/QMCDrivers/QMCDriverFactory.h +++ b/src/QMCDrivers/QMCDriverFactory.h @@ -26,13 +26,13 @@ #include "QMCDrivers/MCPopulation.h" #include "Particle/ParticleSetPool.h" #include "Estimators/EstimatorManagerInput.h" +#include "Particle/MCWalkerConfiguration.h" class Communicate; namespace qmcplusplus { //forward declaration -class MCWalkerConfiguration; class QMCDriverInterface; class WaveFunctionPool; class HamiltonianPool; diff --git a/src/QMCDrivers/RMC/RMCFactory.h b/src/QMCDrivers/RMC/RMCFactory.h index fa52cfc6e5..90a12417de 100644 --- a/src/QMCDrivers/RMC/RMCFactory.h +++ b/src/QMCDrivers/RMC/RMCFactory.h @@ -14,10 +14,10 @@ #ifndef QMCPLUSPLUS_RMC_FACTORY_H #define QMCPLUSPLUS_RMC_FACTORY_H #include "QMCDrivers/QMCDriver.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class RMCFactory diff --git a/src/QMCDrivers/VMC/VMCBatched.h b/src/QMCDrivers/VMC/VMCBatched.h index 589e6ee6da..c404510167 100644 --- a/src/QMCDrivers/VMC/VMCBatched.h +++ b/src/QMCDrivers/VMC/VMCBatched.h @@ -18,7 +18,7 @@ #include "QMCDrivers/MCPopulation.h" #include "QMCDrivers/ContextForSteps.h" #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" - +#include "Particle/SampleStack.h" #include "Utilities/Timer.h" namespace qmcplusplus diff --git a/src/QMCDrivers/VMC/VMCFactory.h b/src/QMCDrivers/VMC/VMCFactory.h index 42a277bf63..eca789f202 100644 --- a/src/QMCDrivers/VMC/VMCFactory.h +++ b/src/QMCDrivers/VMC/VMCFactory.h @@ -14,10 +14,10 @@ #ifndef QMCPLUSPLUS_VMC_FACTORY_H #define QMCPLUSPLUS_VMC_FACTORY_H #include "QMCDrivers/QMCDriver.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class VMCFactory diff --git a/src/QMCDrivers/VMC/VMCFactoryNew.h b/src/QMCDrivers/VMC/VMCFactoryNew.h index 2f79751df7..e1247a5d5a 100644 --- a/src/QMCDrivers/VMC/VMCFactoryNew.h +++ b/src/QMCDrivers/VMC/VMCFactoryNew.h @@ -18,11 +18,11 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCWaveFunctions/WaveFunctionPool.h" #include "Message/Communicate.h" - +#include "Particle/ParticleSetPool.h" +#include "Particle/SampleStack.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class MCPopulation; class ProjectData; diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h index b9440a6bfb..305cb180fc 100644 --- a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h +++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h @@ -18,6 +18,7 @@ #include "QMCDrivers/WFOpt/QMCCostFunctionBase.h" #include "QMCDrivers/CloneManager.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "SampleStack.h" namespace qmcplusplus { diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp index d7e0dae882..e45b2cebff 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp @@ -28,6 +28,7 @@ #include "CPU/Blasf.h" #include "Numerics/MatrixOperators.h" #include "Message/UniformCommunicateError.h" +#include "Particle/SampleStack.h" #include #ifdef HAVE_LMY_ENGINE #include "formic/utils/matrix.h" diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp index 7aaac1e60b..fa4c35012c 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp @@ -30,6 +30,8 @@ #include "Numerics/MatrixOperators.h" #include "EstimatorInputDelegates.h" #include "Message/UniformCommunicateError.h" +#include "Particle/SampleStack.h" + #include #ifdef HAVE_LMY_ENGINE #include "formic/utils/matrix.h" diff --git a/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h b/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h index d78ffb5ff4..010ef554ad 100644 --- a/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h +++ b/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_WFOPTFACTORYNEW_H #include "QMCDrivers/QMCDriverInterface.h" +#include "Particle/SampleStack.h" class Communicate; @@ -22,7 +23,6 @@ class MCPopulation; class WaveFunctionPool; class QMCHamiltonian; class TrialWaveFunction; -class SampleStack; class QMCFixedSampleLinearOptimizeBatched; class ProjectData; diff --git a/src/QMCDrivers/WalkerElementsRef.h b/src/QMCDrivers/WalkerElementsRef.h index 1a11de623c..d5d35a6bcc 100644 --- a/src/QMCDrivers/WalkerElementsRef.h +++ b/src/QMCDrivers/WalkerElementsRef.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_WALKERELEMENTSREF_H #include "Configuration.h" +#include "Particle/ParticleSetTraits.h" #include "Particle/Walker.h" namespace qmcplusplus @@ -22,28 +23,35 @@ class TrialWaveFunction; /** type for returning the walker and its elements from MCPopulation * - * have no expectations for the validity of the references in this structure past - * the context it was returned in. It should not be returned by a call in a + * have no expectations for the validity of the references in this structure + * past the context it was returned in. It should not be returned by a call in a * crowd or threaded context. - * + * * @ye-luo's "fat" walker * - * We need this if we want to "copyFrom" the whole fat walker when it comes off the line - * i.e. mpi. Insuring the "fat" walker is valid at the earliest possible point seems - * less likely to end in tears then just calling copyFrom random other places (hopefully) - * in time, in order to not access an invalid walker element. + * We need this if we want to "copyFrom" the whole fat walker when it comes off + * the line i.e. mpi. Insuring the "fat" walker is valid at the earliest + * possible point seems less likely to end in tears then just calling copyFrom + * random other places (hopefully) in time, in order to not access an invalid + * walker element. */ struct WalkerElementsRef { - /** to allow use of emplace back - */ - WalkerElementsRef(Walker& walker_in, ParticleSet& pset_in, TrialWaveFunction& twf_in) : walker(walker_in), pset(pset_in), twf(twf_in) {} -; - Walker& walker; - ParticleSet& pset; - TrialWaveFunction& twf; + using WalkerType = Walker, + LatticeParticleTraits>; + /** to allow use of emplace back + */ + WalkerElementsRef(WalkerType& walker_in, ParticleSet& pset_in, + TrialWaveFunction& twf_in) : + walker(walker_in), + pset(pset_in), + twf(twf_in){}; + + WalkerType& walker; + ParticleSet& pset; + TrialWaveFunction& twf; }; -} +} // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/tests/SetupPools.h b/src/QMCDrivers/tests/SetupPools.h index ed43c7f1e1..88b5e12205 100644 --- a/src/QMCDrivers/tests/SetupPools.h +++ b/src/QMCDrivers/tests/SetupPools.h @@ -15,11 +15,10 @@ #include "Message/Communicate.h" #include "type_traits/template_types.hpp" #include "OhmmsData/Libxml2Doc.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { - -class ParticleSetPool; class WaveFunctionPool; class HamiltonianPool; diff --git a/src/QMCDrivers/tests/WalkerConsumer.h b/src/QMCDrivers/tests/WalkerConsumer.h index 689a2be280..91bc9675bf 100644 --- a/src/QMCDrivers/tests/WalkerConsumer.h +++ b/src/QMCDrivers/tests/WalkerConsumer.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // @@ -12,10 +12,12 @@ #ifndef QMCPLUSPLUS_WALKERCONSUMER_H #define QMCPLUSPLUS_WALKERCONSUMER_H -#include - +#include "Configuration.h" +#include "Particle/ParticleSetTraits.h" #include "Particle/Walker.h" +#include + namespace qmcplusplus { class ResourceCollection; @@ -29,32 +31,38 @@ namespace testing class WalkerConsumer { public: - std::vector>> walkers; - std::vector> walker_elecs_; - std::vector> walker_twfs_; - std::vector> walker_hamiltonians_; - - void initializeResources(const ResourceCollection& twf_resource) {} - - void addWalker(Walker& walker, - ParticleSet& elecs, - TrialWaveFunction& twf, - QMCHamiltonian& hamiltonian) - { - walkers.push_back(walker); - walker_elecs_.push_back(elecs); - walker_twfs_.push_back(twf); - walker_hamiltonians_.push_back(hamiltonian); - } - - void clearWalkers() - { - // We're clearing the refs to the objects not the referred to objects. - walkers.clear(); - walker_elecs_.clear(); - walker_twfs_.clear(); - walker_hamiltonians_.clear(); - } + using WalkerType = Walker, + LatticeParticleTraits>; + + std::vector> walkers; + std::vector> walker_elecs_; + std::vector> walker_twfs_; + std::vector> walker_hamiltonians_; + + void + initializeResources(const ResourceCollection& twf_resource) + { + } + + void + addWalker(WalkerType& walker, ParticleSet& elecs, TrialWaveFunction& twf, + QMCHamiltonian& hamiltonian) + { + walkers.push_back(walker); + walker_elecs_.push_back(elecs); + walker_twfs_.push_back(twf); + walker_hamiltonians_.push_back(hamiltonian); + } + + void + clearWalkers() + { + // We're clearing the refs to the objects not the referred to objects. + walkers.clear(); + walker_elecs_.clear(); + walker_twfs_.clear(); + walker_hamiltonians_.clear(); + } }; } // namespace testing diff --git a/src/QMCDrivers/tests/test_Crowd.cpp b/src/QMCDrivers/tests/test_Crowd.cpp index 96d5d98a1c..ea854406ce 100644 --- a/src/QMCDrivers/tests/test_Crowd.cpp +++ b/src/QMCDrivers/tests/test_Crowd.cpp @@ -29,7 +29,7 @@ namespace testing class CrowdWithWalkers { public: - using MCPWalker = Walker; + using MCPWalker = Crowd::MCPWalker; EstimatorManagerNew em; UPtr crowd_ptr; diff --git a/src/QMCDrivers/tests/test_DMCBatched.cpp b/src/QMCDrivers/tests/test_DMCBatched.cpp index 1a069efb3b..e846141452 100644 --- a/src/QMCDrivers/tests/test_DMCBatched.cpp +++ b/src/QMCDrivers/tests/test_DMCBatched.cpp @@ -20,6 +20,7 @@ #include "Concurrency/Info.hpp" #include "Concurrency/UtilityFunctions.hpp" #include "Platforms/Host/OutputManager.h" +#include "SampleStack.h" namespace qmcplusplus { diff --git a/src/QMCDrivers/tests/test_SFNBranch.cpp b/src/QMCDrivers/tests/test_SFNBranch.cpp index d2ccdc1d5d..5291e7d36f 100644 --- a/src/QMCDrivers/tests/test_SFNBranch.cpp +++ b/src/QMCDrivers/tests/test_SFNBranch.cpp @@ -26,7 +26,7 @@ namespace qmcplusplus { -using MCPWalker = Walker; +using MCPWalker = MCPopulation::MCPWalker; using RealType = double; namespace testing diff --git a/src/QMCHamiltonians/ECPotentialBuilder.h b/src/QMCHamiltonians/ECPotentialBuilder.h index aa0353dde3..e3b8ab8119 100644 --- a/src/QMCHamiltonians/ECPotentialBuilder.h +++ b/src/QMCHamiltonians/ECPotentialBuilder.h @@ -19,10 +19,10 @@ #include "QMCHamiltonians/NonLocalECPotential.h" #include "QMCHamiltonians/SOECPotential.h" #include "QMCHamiltonians/L2Potential.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { class QMCHamiltonian; -class ParticleSet; class TrialWaveFunction; struct ECPotentialBuilder : public MPIObjectBase, public QMCTraits diff --git a/src/QMCHamiltonians/HamiltonianPool.h b/src/QMCHamiltonians/HamiltonianPool.h index f3651b162e..5bbae1fd91 100644 --- a/src/QMCHamiltonians/HamiltonianPool.h +++ b/src/QMCHamiltonians/HamiltonianPool.h @@ -20,15 +20,16 @@ #include "QMCHamiltonians/HamiltonianFactory.h" #include "OhmmsData/OhmmsElementBase.h" #include "Message/MPIObjectBase.h" +#include "Particle/ParticleSet.h" +#include "Particle/ParticleSetPool.h" +#include "Particle/MCWalkerConfiguration.h" + #include struct Libxml2Document; namespace qmcplusplus { -class ParticleSet; -class MCWalkerConfiguration; -class ParticleSetPool; class WaveFunctionPool; /** @ingroup qmcapp diff --git a/src/QMCHamiltonians/NonLocalECPotential.h b/src/QMCHamiltonians/NonLocalECPotential.h index 40e5948712..52f7798174 100644 --- a/src/QMCHamiltonians/NonLocalECPotential.h +++ b/src/QMCHamiltonians/NonLocalECPotential.h @@ -22,6 +22,7 @@ #include "QMCHamiltonians/ForceBase.h" #include "QMCHamiltonians/NonLocalECPComponent.h" #include "Particle/NeighborLists.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { template diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h index cd4641dd01..41c8cda129 100644 --- a/src/QMCHamiltonians/OperatorBase.h +++ b/src/QMCHamiltonians/OperatorBase.h @@ -34,12 +34,13 @@ #endif #include "QMCHamiltonians/Listener.hpp" #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "Particle/MCWalkerConfiguration.h" + #include #include // std::unique_ptr namespace qmcplusplus { -class MCWalkerConfiguration; /**@defgroup hamiltonian Hamiltonian group * @brief QMCHamiltonian and its component, OperatorBase diff --git a/src/QMCHamiltonians/QMCHamiltonian.h b/src/QMCHamiltonians/QMCHamiltonian.h index 11dda4ac9c..2b193e72dc 100644 --- a/src/QMCHamiltonians/QMCHamiltonian.h +++ b/src/QMCHamiltonians/QMCHamiltonian.h @@ -35,10 +35,10 @@ #include "Estimators/TraceManager.h" #endif #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { -class MCWalkerConfiguration; class HamiltonianFactory; class NonLocalECPotential; diff --git a/src/QMCWaveFunctions/BasisSetBaseT.h b/src/QMCWaveFunctions/BasisSetBaseT.h index e6c8bd9e99..569abf9173 100644 --- a/src/QMCWaveFunctions/BasisSetBaseT.h +++ b/src/QMCWaveFunctions/BasisSetBaseT.h @@ -22,7 +22,7 @@ #define QMCPLUSPLUS_BASISSETBASET_H #include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" namespace qmcplusplus @@ -216,6 +216,22 @@ struct SoaBasisSetBaseT std::vector& is_s_orbital) const { } + + /** initialize a shared resource and hand it to collection + */ + virtual void createResource(ResourceCollection& collection) const {} + + /** acquire a shared resource from collection + */ + virtual void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& bset_list) const + {} + + /** return a shared resource to collection + */ + virtual void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& bset_list) const + {} }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp deleted file mode 100644 index 0c5cf0d2c9..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp +++ /dev/null @@ -1,233 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/** @file BsplineReaderBase.cpp - * - * Implement super function - */ -#include "EinsplineSetBuilder.h" -#include "BsplineReaderBase.h" -#include "OhmmsData/AttributeSet.h" -#include "Message/CommOperators.h" - -#include -#include - -namespace qmcplusplus -{ -BsplineReaderBase::BsplineReaderBase(EinsplineSetBuilder* e) - : mybuilder(e), MeshSize(0), checkNorm(true), saveSplineCoefs(false), rotate(true) -{ - myComm = mybuilder->getCommunicator(); -} - -void BsplineReaderBase::get_psi_g(int ti, int spin, int ib, Vector>& cG) -{ - int ncg = 0; - if (myComm->rank() == 0) - { - std::string path = psi_g_path(ti, spin, ib); - mybuilder->H5File.read(cG, path); - ncg = cG.size(); - } - myComm->bcast(ncg); - if (ncg != mybuilder->MaxNumGvecs) - { - APP_ABORT("Failed : ncg != MaxNumGvecs"); - } - myComm->bcast(cG); -} - -BsplineReaderBase::~BsplineReaderBase() {} - -inline std::string make_bandinfo_filename(const std::string& root, - int spin, - int twist, - const Tensor& tilematrix, - int gid) -{ - std::ostringstream oo; - oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0) - << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_" - << spin << ".tw_" << twist; - if (gid >= 0) - oo << ".g" << gid; - return oo.str(); -} - - -inline std::string make_bandgroup_name(const std::string& root, - int spin, - int twist, - const Tensor& tilematrix, - int first, - int last) -{ - std::ostringstream oo; - oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0) - << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_" - << spin << ".tw_" << twist << ".l" << first << "u" << last; - return oo.str(); -} - -void BsplineReaderBase::setCommon(xmlNodePtr cur) -{ - // check orbital normalization by default - std::string checkOrbNorm("yes"); - std::string saveCoefs("no"); - OhmmsAttributeSet a; - a.add(checkOrbNorm, "check_orb_norm"); - a.add(saveCoefs, "save_coefs"); - a.put(cur); - - // allow user to turn off norm check with a warning - if (checkOrbNorm == "no") - { - app_log() << "WARNING: disable orbital normalization check!" << std::endl; - checkNorm = false; - } - saveSplineCoefs = saveCoefs == "yes"; -} - -std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePtr cur) -{ - int ns(0); - std::string spo_object_name; - OhmmsAttributeSet a; - a.add(ns, "size"); - a.add(spo_object_name, "name"); - a.add(spo_object_name, "id"); - a.put(cur); - - if (ns == 0) - APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing"); - - if (spo2band.empty()) - spo2band.resize(mybuilder->states.size()); - - std::vector& fullband = (*(mybuilder->FullBands[spin])); - - if (spo2band[spin].empty()) - { - spo2band[spin].reserve(fullband.size()); - if (!mybuilder->states[spin]) - mybuilder->states[spin] = std::make_unique(); - mybuilder->clear_states(spin); - initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]); - } - - BandInfoGroup vals; - vals.TwistIndex = fullband[0].TwistIndex; - vals.GroupID = 0; - vals.myName = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns); - vals.selectBands(fullband, 0, ns, false); - - return create_spline_set(spo_object_name, spin, vals); -} - -std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info) -{ - std::string spo_object_name; - OhmmsAttributeSet a; - a.add(spo_object_name, "name"); - a.add(spo_object_name, "id"); - a.put(cur); - - if (spo2band.empty()) - spo2band.resize(mybuilder->states.size()); - - std::vector& fullband = (*(mybuilder->FullBands[spin])); - - if (spo2band[spin].empty()) - { - spo2band[spin].reserve(fullband.size()); - if (!mybuilder->states[spin]) - mybuilder->states[spin] = std::make_unique(); - mybuilder->clear_states(spin); - initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]); - } - - BandInfoGroup vals; - vals.TwistIndex = fullband[0].TwistIndex; - vals.GroupID = 0; - vals.myName = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix, - input_info.min_index(), input_info.max_index()); - vals.selectBands(fullband, spo2band[spin][input_info.min_index()], input_info.max_index() - input_info.min_index(), - false); - - return create_spline_set(spo_object_name, spin, vals); -} - -/** build index tables to map a state to band with k-point folidng - * @param bigspace full BandInfo constructed by EinsplineSetBuilder - * @param sposet SPOSetInfo owned by someone, most likely EinsplinseSetBuilder - * @param spo2band spo2band[i] is the index in bigspace - * - * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i - */ -void BsplineReaderBase::initialize_spo2band(int spin, - const std::vector& bigspace, - SPOSetInfo& sposet, - std::vector& spo2band) -{ - spo2band.reserve(bigspace.size()); - int ns = 0; - for (int i = 0; i < bigspace.size(); ++i) - { - spo2band.push_back(i); - SPOInfo a(ns, bigspace[i].Energy); - sposet.add(a); - ns++; - if (bigspace[i].MakeTwoCopies) - { - spo2band.push_back(i); - SPOInfo b(ns, bigspace[i].Energy); - sposet.add(b); - ns++; - } - } - - //write to a file - const Communicate* comm = myComm; - if (comm->rank()) - return; - - std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(), spin, mybuilder->twist_num_, - mybuilder->TileMatrix, comm->getGroupID()); - aname += ".bandinfo.dat"; - - std::ofstream o(aname.c_str()); - std::array s; - ns = 0; - using PosType = QMCTraits::PosType; - o << "# Band State TwistIndex BandIndex Energy Kx Ky Kz K1 K2 K3 KmK " - << std::endl; - for (int i = 0; i < bigspace.size(); ++i) - { - int ti = bigspace[i].TwistIndex; - int bi = bigspace[i].BandIndex; - double e = bigspace[i].Energy; - int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; - PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); - int s_size = std::snprintf(s.data(), s.size(), "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n", - i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->primcell_kpoints[ti][0], - mybuilder->primcell_kpoints[ti][1], mybuilder->primcell_kpoints[ti][2], nd); - if (s_size < 0) - throw std::runtime_error("Error generating bandinfo"); - o << s.data(); - ns += nd; - } -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h index 7046296bb6..9804bf6336 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h @@ -20,180 +20,12 @@ #ifndef QMCPLUSPLUS_BSPLINE_READER_BASE_H #define QMCPLUSPLUS_BSPLINE_READER_BASE_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" -#include -#include "QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" namespace qmcplusplus { -struct SPOSetInputInfo; - -/** - * Each SplineC2X needs a reader derived from BsplineReaderBase. - * This base class handles common chores - * - check_twists : read gvectors, set twists for folded bands if needed, and set the phase for the special K - * - set_grid : create the basic grid and boundary conditions for einspline - * Note that template is abused but it works. - */ -struct BsplineReaderBase -{ - ///pointer to the EinsplineSetBuilder - EinsplineSetBuilder* mybuilder; - ///communicator - Communicate* myComm; - ///mesh size - TinyVector MeshSize; - ///check the norm of orbitals - bool checkNorm; - ///save spline coefficients to storage - bool saveSplineCoefs; - ///apply orbital rotations - bool rotate; - ///map from spo index to band index - std::vector> spo2band; - - BsplineReaderBase(EinsplineSetBuilder* e); - - virtual ~BsplineReaderBase(); - - /** read gvectors and set the mesh, and prepare for einspline - */ - template - inline bool set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) - { - //This sets MeshSize from the input file - bool havePsig = mybuilder->ReadGvectors_ESHDF(); - - //If this MeshSize is not initialized, use the meshsize set by the input based on FFT grid and meshfactor - if (MeshSize[0] == 0) - MeshSize = mybuilder->MeshSize; - - app_log() << " Using meshsize=" << MeshSize << "\n vs input meshsize=" << mybuilder->MeshSize << std::endl; - - for (int j = 0; j < 3; ++j) - { - xyz_grid[j].start = 0.0; - xyz_grid[j].end = 1.0; - xyz_grid[j].num = MeshSize[j]; - - if (halfg[j]) - { - xyz_bc[j].lCode = ANTIPERIODIC; - xyz_bc[j].rCode = ANTIPERIODIC; - } - else - { - xyz_bc[j].lCode = PERIODIC; - xyz_bc[j].rCode = PERIODIC; - } - - xyz_bc[j].lVal = 0.0; - xyz_bc[j].rVal = 0.0; - } - return havePsig; - } - - /** initialize twist-related data for N orbitals - */ - template - inline void check_twists(SPE* bspline, const BandInfoGroup& bandgroup) - { - //init(orbitalSet,bspline); - bspline->PrimLattice = mybuilder->PrimCell; - bspline->GGt = dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); - - int N = bandgroup.getNumDistinctOrbitals(); - int numOrbs = bandgroup.getNumSPOs(); - - bspline->setOrbitalSetSize(numOrbs); - bspline->resizeStorage(N, N); - - bspline->first_spo = bandgroup.getFirstSPO(); - bspline->last_spo = bandgroup.getLastSPO(); - - int num = 0; - const std::vector& cur_bands = bandgroup.myBands; - for (int iorb = 0; iorb < N; iorb++) - { - int ti = cur_bands[iorb].TwistIndex; - bspline->kPoints[iorb] = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); - bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; - num += bspline->MakeTwoCopies[iorb] ? 2 : 1; - } - - app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs << std::endl; - - bspline->HalfG = 0; - TinyVector bconds = mybuilder->TargetPtcl.getLattice().BoxBConds; - if (!bspline->isComplex()) - { - //no k-point folding, single special k point (G, L ...) - TinyVector twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex]; - for (int i = 0; i < 3; i++) - if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) - bspline->HalfG[i] = 1; - else - bspline->HalfG[i] = 0; - app_log() << " TwistIndex = " << cur_bands[0].TwistIndex << " TwistAngle " << twist0 << std::endl; - app_log() << " HalfG = " << bspline->HalfG << std::endl; - } - app_log().flush(); - } - - /** return the path name in hdf5 - */ - inline std::string psi_g_path(int ti, int spin, int ib) - { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_g"; - return path.str(); - } - - /** return the path name in hdf5 - */ - inline std::string psi_r_path(int ti, int spin, int ib) - { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_r"; - return path.str(); - } - - /** read/bcast psi_g - * @param ti twist index - * @param spin spin index - * @param ib band index - * @param cG psi_g as stored in hdf5 - */ - void get_psi_g(int ti, int spin, int ib, Vector>& cG); - - /** create the actual spline sets - */ - virtual std::unique_ptr create_spline_set(const std::string& my_name, - int spin, - const BandInfoGroup& bandgroup) = 0; - - /** setting common parameters - */ - void setCommon(xmlNodePtr cur); - - /** create the spline after one of the kind is created */ - std::unique_ptr create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); - - /** create the spline set */ - std::unique_ptr create_spline_set(int spin, xmlNodePtr cur); - - /** Set the checkNorm variable */ - inline void setCheckNorm(bool new_checknorm) { checkNorm = new_checknorm; }; - - /** Set the orbital rotation flag. Rotations are applied to balance the real/imaginary components. */ - inline void setRotate(bool new_rotate) { rotate = new_rotate; }; - - void initialize_spo2band(int spin, - const std::vector& bigspace, - SPOSetInfo& sposet, - std::vector& band2spo); -}; +using BsplineReaderBase = BsplineReaderBaseT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp index bf6c0c7fff..83cdfbd190 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp @@ -15,7 +15,7 @@ // at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// -/** @file BsplineReaderBase.cpp +/** @file BsplineReaderBaseT.cpp * * Implement super function */ diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h index 6a5e880b0d..b219d91101 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h @@ -21,230 +21,12 @@ #ifndef QMCPLUSPLUS_BSPLINESET_H #define QMCPLUSPLUS_BSPLINESET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "spline/einspline_engine.hpp" -#include "spline/einspline_util.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" namespace qmcplusplus { -/** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R. - * Its derived template classes manage the storage and evaluation at given precision. - * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class. - */ -class BsplineSet : public SPOSet -{ -protected: - static const int D = DIM; - ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases - size_t MyIndex; - ///first index of the SPOs this Spline handles - size_t first_spo; - ///last index of the SPOs this Spline handles - size_t last_spo; - ///sign bits at the G/2 boundaries - TinyVector HalfG; - ///flags to unpack sin/cos - std::vector MakeTwoCopies; - /** kpoints for each unique orbitals. - * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated. - * Changing the sign requires updating all the evaluation code. - */ - std::vector kPoints; - ///remap splines to orbitals - aligned_vector BandIndexMap; - ///band offsets used for communication - std::vector offset; - -public: - BsplineSet(const std::string& my_name) : SPOSet(my_name), MyIndex(0), first_spo(0), last_spo(0) {} - - virtual bool isComplex() const = 0; - virtual std::string getKeyword() const = 0; - - auto& getHalfG() const { return HalfG; } - - inline void init_base(int n) - { - kPoints.resize(n); - MakeTwoCopies.resize(n); - BandIndexMap.resize(n); - for (int i = 0; i < n; i++) - BandIndexMap[i] = i; - } - - ///remap kpoints to group general kpoints & special kpoints - int remap_kpoints() - { - std::vector k_copy(kPoints); - const int nk = kPoints.size(); - int nCB = 0; - //two pass - for (int i = 0; i < nk; ++i) - { - if (MakeTwoCopies[i]) - { - kPoints[nCB] = k_copy[i]; - BandIndexMap[nCB++] = i; - } - } - int nRealBands = nCB; - for (int i = 0; i < nk; ++i) - { - if (!MakeTwoCopies[i]) - { - kPoints[nRealBands] = k_copy[i]; - BandIndexMap[nRealBands++] = i; - } - } - return nCB; //return the number of complex bands - } - - // propagate SPOSet virtual functions - using SPOSet::evaluateDetRatios; - using SPOSet::evaluateValue; - using SPOSet::evaluateVGH; - using SPOSet::evaluateVGHGH; - using SPOSet::evaluateVGL; - using SPOSet::finalizeConstruction; - using SPOSet::mw_evaluateDetRatios; - using SPOSet::mw_evaluateVGL; - using SPOSet::mw_evaluateVGLandDetRatioGrads; - - using SPOSet::acquireResource; - using SPOSet::createResource; - using SPOSet::releaseResource; - - std::unique_ptr makeClone() const override = 0; - - void setOrbitalSetSize(int norbs) override { OrbitalSetSize = norbs; } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } - } - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override - { - assert(this == &spo_list.getLeader()); - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - - const size_t nw = spo_list.size(); - std::vector mw_psi_v; - std::vector mw_dpsi_v; - std::vector mw_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - mw_psi_v.reserve(nw); - mw_dpsi_v.reserve(nw); - mw_d2psi_v.reserve(nw); - psi_v_list.reserve(nw); - dpsi_v_list.reserve(nw); - d2psi_v_list.reserve(nw); - - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - mw_psi_v.clear(); - mw_dpsi_v.clear(); - mw_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int iw = 0; iw < nw; iw++) - { - mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols()); - mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); - mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); - psi_v_list.push_back(mw_psi_v.back()); - dpsi_v_list.push_back(mw_dpsi_v.back()); - d2psi_v_list.push_back(mw_d2psi_v.back()); - } - - mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - evaluateVGH(P, iat, v, g, h); - } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); - evaluateVGHGH(P, iat, v, g, h, gh); - } - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using BsplineSet = BsplineSetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h index f7794dbba4..d44e0b20cc 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h @@ -23,269 +23,15 @@ #ifndef QMCPLUSPLUS_EINSPLINE_SET_BUILDER_H #define QMCPLUSPLUS_EINSPLINE_SET_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/BandInfo.h" -#include -#include - -#define PW_COEFF_NORM_TOLERANCE 1e-6 - -class Communicate; +#include "Configuration.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" namespace qmcplusplus { -///forward declaration of BsplineReaderBase -struct BsplineReaderBase; - -// Helper needed for TwistMap -struct Int3less -{ - bool operator()(const TinyVector& a, const TinyVector& b) const - { - if (a[0] > b[0]) - return false; - if (a[0] < b[0]) - return true; - if (a[1] > b[1]) - return false; - if (a[1] < b[1]) - return true; - if (a[2] > b[2]) - return false; - if (a[2] < b[2]) - return true; - return false; - } -}; -struct Int4less -{ - bool operator()(const TinyVector& a, const TinyVector& b) const - { - for (int i = 0; i < 4; i++) - { - if (a[i] > b[i]) - return false; - if (a[i] < b[i]) - return true; - } - return false; - } -}; - - -/** construct a name for spline SPO set - */ -struct H5OrbSet -{ - ///index for the spin set - int SpinSet; - ///number of orbitals that belong to this set - int NumOrbs; - ///name of the HDF5 file - std::filesystem::path FileName; - /** true if a < b - * - * The ordering - * - name - * - spin set - * - number of orbitals - */ - bool operator()(const H5OrbSet& a, const H5OrbSet& b) const - { - if (a.FileName == b.FileName) - { - if (a.SpinSet == b.SpinSet) - return a.NumOrbs < b.NumOrbs; - else - return a.SpinSet < b.SpinSet; - } - else - return a.FileName < b.FileName; - } - - H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) - : SpinSet(spinSet), NumOrbs(numOrbs), FileName(std::move(name)) - {} - H5OrbSet() = default; -}; - /** EinsplineSet builder */ -class EinsplineSetBuilder : public SPOSetBuilder -{ -public: - using PSetMap = std::map>; - using UnitCellType = CrystalLattice; - - ///reference to the particleset pool - const PSetMap& ParticleSets; - ///quantum particle set - ParticleSet& TargetPtcl; - ///ionic system - ParticleSet* SourcePtcl; - - /** Helper vector for sorting bands - */ - std::vector>> FullBands; - - /// reader to use BsplineReaderBase - std::unique_ptr MixedSplineReader; - - ///This is true if we have the orbital derivatives w.r.t. the ion positions - bool HaveOrbDerivs; - ///root XML node with href, sort, tilematrix, twistnum, source, precision,truncate,version - xmlNodePtr XMLRoot; - - std::map SPOSetMap; - - ///constructor - EinsplineSetBuilder(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur); - - ///destructor - ~EinsplineSetBuilder() override; - - /** initialize the Antisymmetric wave function for electrons - * @param cur the current xml node - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - /** initialize with the existing SPOSet */ - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) override; - - ////////////////////////////////////// - // HDF5-related data and functions // - ////////////////////////////////////// - hdf_archive H5File; - std::filesystem::path H5FileName; - // HDF5 orbital file version - typedef enum - { - QMCPACK, - ESHDF - } FormatType; - FormatType Format; - TinyVector Version; - std::string parameterGroup, ionsGroup, eigenstatesGroup; - std::vector Occ; - bool ReadOrbitalInfo(bool skipChecks = false); - bool ReadOrbitalInfo_ESHDF(bool skipChecks = false); - void BroadcastOrbitalInfo(); - bool CheckLattice(); - - /** read gvectors for each twist - * @return true, if psi_g is found - */ - bool ReadGvectors_ESHDF(); - - Tensor Lattice, RecipLattice, LatticeInv, SuperLattice, GGt; - UnitCellType SuperCell, PrimCell, PrimCellInv; - int NumBands, NumElectrons, NumSpins, NumTwists; - int MaxNumGvecs; - double MeshFactor; - RealType MatchingTol; - TinyVector MeshSize; - std::vector>> Gvecs; - - Vector IonTypes; - Vector> IonPos; - // mapping the ions in the supercell to the primitive cell - std::vector Super2Prim; - - ///////////////////////////// - // Twist angle information // - ///////////////////////////// - // The "true" twist number after analyzing twistnum, twist XML input and h5 - int twist_num_; - // primitive cell k-points from DFT calculations - std::vector> primcell_kpoints; - // primitive cell to supercell tiling matrix - Tensor TileMatrix; - // This vector stores which twist indices will be used by this clone - std::vector> UseTwists; - std::vector IncludeTwists, DistinctTwists; - /// if false, splines are conceptually complex valued - bool use_real_splines_; - int NumDistinctOrbitals; - // This is true if the corresponding twist in DistinctTwists should - // should be used to generate two distinct orbitals from the real and - // imaginary parts. - std::vector MakeTwoCopies; - // This maps a 3-integer twist index into the twist number in the file - std::map, int, Int3less> TwistMap; - - bool TwistPair(PosType a, PosType b) const; - void TileIons(); - void OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks = false); - void OccupyBands_ESHDF(int spin, int sortBands, int numOrbs); - - //////////////////////////////// - // Atomic orbital information // - //////////////////////////////// - struct CenterInfo - { - std::vector lmax, spline_npoints, GroupID; - std::vector spline_radius, cutoff, inner_cutoff, non_overlapping_radius; - std::vector> ion_pos; - int Ncenters; - - CenterInfo() : Ncenters(0){}; - - void resize(int ncenters) - { - Ncenters = ncenters; - lmax.resize(ncenters, -1); - spline_npoints.resize(ncenters, -1); - GroupID.resize(ncenters, 0); - spline_radius.resize(ncenters, -1.0); - inner_cutoff.resize(ncenters, -1.0); - non_overlapping_radius.resize(ncenters, -1.0); - cutoff.resize(ncenters, -1.0); - ion_pos.resize(ncenters); - } - } AtomicCentersInfo; - - // This returns the path in the HDF5 file to the group for orbital - // with twist ti and band bi - std::string OrbitalPath(int ti, int bi); - - ///////////////////////////////////////////////////////////// - // Information to avoid storing the same orbitals twice in // - // spin-restricted calculations. // - ///////////////////////////////////////////////////////////// - int LastSpinSet, NumOrbitalsRead; - - std::string occ_format; - int particle_hole_pairs; - bool makeRotations; - -protected: - /** broadcast SortBands - * @param N number of state - * @param root true if it is the i/o node - */ - void bcastSortBands(int splin, int N, bool root); - - /** a specific but clean code path in createSPOSetFromXML, for PBC, double, ESHDF - * @param cur the current xml node - */ - void set_metadata(int numOrbs, - int twist_num_inp, - const TinyVector& twist_inp, - bool skipChecks = false); - - /** analyze twists of orbitals in h5 and determinine twist_num_ - * @param twist_num_inp twistnum XML input - * @param twist_inp twst XML input - */ - void AnalyzeTwists2(const int twist_num_inp, const TinyVector& twist_inp); - - /// twistnum_inp == -9999 to indicate no given input after parsing XML - static constexpr int TWISTNUM_NO_INPUT = -9999; - /// twist_inp[i] <= -9999 to indicate no given input after parsing XML - static constexpr double TWIST_NO_INPUT = -9999; -}; +using EinsplineSetBuilder = EinsplineSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h index 73b7f885c8..38be28d9a3 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h @@ -19,32 +19,13 @@ #ifndef QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDER_H #define QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "BsplineFactory/EinsplineSetBuilder.h" -class Communicate; +#include "Configuration.h" +#include "QMCWaveFunctions/EinsplineSpinorSetBuilderT.h" namespace qmcplusplus { - -class EinsplineSpinorSetBuilder : public EinsplineSetBuilder -{ - using PSetMap = std::map>; - -public: - ///constructor - EinsplineSpinorSetBuilder(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur) - : EinsplineSetBuilder(p, psets, comm, cur){}; - - ///destructor - ~EinsplineSpinorSetBuilder() override{}; - - /** initialize the Antisymmetric wave function for electrons - * @param cur the current xml node - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; -}; +using EinsplineSpinorSetBuilder = EinsplineSpinorSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp deleted file mode 100644 index 9f92bd0ea7..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp +++ /dev/null @@ -1,21 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2021 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "HybridRepCenterOrbitals.h" - -namespace qmcplusplus -{ -template class AtomicOrbitals; -template class AtomicOrbitals; -template class HybridRepCenterOrbitals; -template class HybridRepCenterOrbitals; -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h index 462496a73b..83dde48ba1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h @@ -17,749 +17,15 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H #define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H -#include "Particle/DistanceTable.h" -#include "Particle/VirtualParticleSet.h" -#include "Numerics/SoaSphericalTensor.h" -#include "spline2/MultiBspline1D.hpp" -#include "Numerics/SmoothFunctions.hpp" -#include "hdf/hdf_archive.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" namespace qmcplusplus { -template -class HybridRepSetReader; - template -class AtomicOrbitals -{ -public: - static const int D = 3; - using AtomicSplineType = typename bspline_traits::SplineType; - using AtomicBCType = typename bspline_traits::BCType; - using AtomicSingleSplineType = UBspline_1d_d; - using PointType = TinyVector; - using value_type = ST; - - using vContainer_type = aligned_vector; - -private: - // near core cutoff - ST rmin; - // far from core cutoff, rmin_sqrt>=rmin - ST rmin_sqrt; - ST cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; - int spline_npoints, BaseN; - int NumBands, Npad; - PointType center_pos; - const int lmax, lm_tot; - SoaSphericalTensor Ylm; - vContainer_type l_vals; - vContainer_type r_power_minus_l; - ///1D spline of radial functions of all the orbitals - std::shared_ptr> SplineInst; - - vContainer_type localV, localG, localL; - -public: - AtomicOrbitals(int Lmax) : lmax(Lmax), lm_tot((Lmax + 1) * (Lmax + 1)), Ylm(Lmax) - { - r_power_minus_l.resize(lm_tot); - l_vals.resize(lm_tot); - for (int l = 0; l <= lmax; l++) - for (int m = -l; m <= l; m++) - l_vals[l * (l + 1) + m] = l; - rmin = std::exp(std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); - rmin = std::max(rmin, std::numeric_limits::epsilon()); - rmin_sqrt = std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); - } - - // accessing functions, const only - ST getCutoff() const { return cutoff; } - ST getCutoffBuffer() const { return cutoff_buffer; } - ST getSplineRadius() const { return spline_radius; } - ST getNonOverlappingRadius() const { return non_overlapping_radius; } - int getSplineNpoints() const { return spline_npoints; } - int getLmax() const { return lmax; } - const PointType& getCenterPos() const { return center_pos; } - - inline void resizeStorage(size_t Nb) - { - NumBands = Nb; - Npad = getAlignedSize(Nb); - localV.resize(Npad * lm_tot); - localG.resize(Npad * lm_tot); - localL.resize(Npad * lm_tot); - create_spline(); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm, std::vector& offset) - { - gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); - } - - template - inline void set_info(const PT& R, - const VT& cutoff_in, - const VT& cutoff_buffer_in, - const VT& spline_radius_in, - const VT& non_overlapping_radius_in, - const int spline_npoints_in) - { - center_pos[0] = R[0]; - center_pos[1] = R[1]; - center_pos[2] = R[2]; - cutoff = cutoff_in; - cutoff_buffer = cutoff_buffer_in; - spline_radius = spline_radius_in; - spline_npoints = spline_npoints_in; - non_overlapping_radius = non_overlapping_radius_in; - BaseN = spline_npoints + 2; - } - - inline void create_spline() - { - AtomicBCType bc; - bc.lCode = FLAT; - bc.rCode = NATURAL; - Ugrid grid; - grid.start = 0.0; - grid.end = spline_radius; - grid.num = spline_npoints; - SplineInst = std::make_shared>(); - SplineInst->create(grid, bc, lm_tot * Npad); - } - - inline size_t getSplineSizeInBytes() const { return SplineInst->sizeInByte(); } - - inline void flush_zero() { SplineInst->flush_zero(); } - - inline void set_spline(AtomicSingleSplineType* spline, int lm, int ispline) - { - SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); - } - - bool read_splines(hdf_archive& h5f) - { - einspline_engine bigtable(SplineInst->getSplinePtr()); - int lmax_in = 0, spline_npoints_in = 0; - ST spline_radius_in; - if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) - return false; - if (!h5f.readEntry(spline_radius_in, "spline_radius") || spline_radius_in != spline_radius) - return false; - if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || spline_npoints_in != spline_npoints) - return false; - return h5f.readEntry(bigtable, "radial_spline"); - } - - bool write_splines(hdf_archive& h5f) - { - bool success = true; - success = success && h5f.writeEntry(spline_radius, "spline_radius"); - success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); - success = success && h5f.writeEntry(lmax, "l_max"); - success = success && h5f.writeEntry(center_pos, "position"); - einspline_engine bigtable(SplineInst->getSplinePtr()); - success = success && h5f.writeEntry(bigtable, "radial_spline"); - return success; - } - - //evaluate only V - template - inline void evaluate_v(const ST& r, const PointType& dr, VV& myV) - { - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); - else - Ylm.evaluateV(0, 0, 1); - const ST* restrict Ylm_v = Ylm[0]; - - constexpr ST czero(0); - ST* restrict val = myV.data(); - ST* restrict local_val = localV.data(); - std::fill(myV.begin(), myV.end(), czero); - - SplineInst->evaluate(r, localV); - - for (size_t lm = 0; lm < lm_tot; lm++) - { -#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } - } - - template - inline void evaluateValues(const DISPL& Displacements, const int center_idx, const ST& r, VM& multi_myV) - { - if (r <= std::numeric_limits::epsilon()) - Ylm.evaluateV(0, 0, 1); - const ST* restrict Ylm_v = Ylm[0]; - - const size_t m = multi_myV.cols(); - constexpr ST czero(0); - std::fill(multi_myV.begin(), multi_myV.end(), czero); - SplineInst->evaluate(r, localV); - - for (int ivp = 0; ivp < Displacements.size(); ivp++) - { - PointType dr = Displacements[ivp][center_idx]; - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); - - ST* restrict val = multi_myV[ivp]; - ST* restrict local_val = localV.data(); - for (size_t lm = 0; lm < lm_tot; lm++) - { -#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < m; ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } - } - } - - //evaluate VGL - template - inline void evaluate_vgl(const ST& r, const PointType& dr, VV& myV, GV& myG, VV& myL) - { - ST drx, dry, drz, rhatx, rhaty, rhatz, rinv; - if (r > rmin) - { - rinv = 1.0 / r; - } - else - { - rinv = 0; - } - drx = dr[0]; - dry = dr[1]; - drz = dr[2]; - rhatx = drx * rinv; - rhaty = dry * rinv; - rhatz = drz * rinv; - - Ylm.evaluateVGL(drx, dry, drz); - const ST* restrict Ylm_v = Ylm[0]; - const ST* restrict Ylm_gx = Ylm[1]; - const ST* restrict Ylm_gy = Ylm[2]; - const ST* restrict Ylm_gz = Ylm[3]; - - ST* restrict g0 = myG.data(0); - ST* restrict g1 = myG.data(1); - ST* restrict g2 = myG.data(2); - constexpr ST czero(0), cone(1), chalf(0.5); - std::fill(myV.begin(), myV.end(), czero); - std::fill(g0, g0 + Npad, czero); - std::fill(g1, g1 + Npad, czero); - std::fill(g2, g2 + Npad, czero); - std::fill(myL.begin(), myL.end(), czero); - ST* restrict val = myV.data(); - ST* restrict lapl = myL.data(); - ST* restrict local_val = localV.data(); - ST* restrict local_grad = localG.data(); - ST* restrict local_lapl = localL.data(); - - SplineInst->evaluate_vgl(r, localV, localG, localL); - - if (r > rmin_sqrt) - { - // far from core - r_power_minus_l[0] = cone; - ST r_power_temp = cone; - for (int l = 1; l <= lmax; l++) - { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) - { - const ST& l_val = l_vals[lm]; - const ST& r_power = r_power_minus_l[lm]; - const ST Ylm_rescale = Ylm_v[lm] * r_power; - const ST rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_v = local_val[ib]; - const ST local_g = local_grad[ib]; - const ST local_l = local_lapl[ib]; - // value - const ST Vpart = l_val * rinv * local_v; - val[ib] += Ylm_rescale * local_v; - - // grad - const ST factor1 = local_g * Ylm_rescale; - const ST factor2 = local_v * r_power; - const ST factor3 = -Vpart * Ylm_rescale; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; - - // laplacian - lapl[ib] += (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * Ylm_rescale + (local_g - Vpart) * rhat_dot_G; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else if (r > rmin) - { - // the possibility of reaching here is very very low - std::cout << "Warning: an electron is very close to an ion, distance=" << r << " be careful!" << std::endl; - // near core, kill divergence in the laplacian - r_power_minus_l[0] = cone; - ST r_power_temp = cone; - for (int l = 1; l <= lmax; l++) - { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) - { - const ST& l_val = l_vals[lm]; - const ST& r_power = r_power_minus_l[lm]; - const ST Ylm_rescale = Ylm_v[lm] * r_power; - const ST rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_v = local_val[ib]; - const ST local_g = local_grad[ib]; - const ST local_l = local_lapl[ib]; - // value - const ST Vpart = Ylm_rescale * local_v; - val[ib] += Vpart; - - // grad - const ST factor1 = local_g * Ylm_rescale; - const ST factor2 = local_v * r_power; - const ST factor3 = -l_val * Vpart * rinv; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; - - // laplacian - lapl[ib] += local_l * (cone - chalf * l_val) * (3 * Ylm_rescale + rhat_dot_G); - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else - { - std::cout << "Warning: an electron is on top of an ion!" << std::endl; - // strictly zero - -#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - // value - val[ib] = Ylm_v[0] * local_val[ib]; - - // laplacian - lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - if (lm_tot > 0) - { - //std::cout << std::endl; - for (size_t lm = 1; lm < 4; lm++) - { -#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_g = local_grad[ib]; - // grad - g0[ib] += local_g * Ylm_gx[lm]; - g1[ib] += local_g * Ylm_gy[lm]; - g2[ib] += local_g * Ylm_gz[lm]; - } - local_grad += Npad; - } - } - } - } - - template - void evaluate_vgh(const ST& r, const PointType& dr, VV& myV, GV& myG, HT& myH) - { - //Needed to do tensor product here - APP_ABORT("AtomicOrbitals::evaluate_vgh"); - } -}; +using AtomicOrbitals = AtomicOrbitalsT; template -class HybridRepCenterOrbitals -{ -public: - static const int D = 3; - using PointType = typename AtomicOrbitals::PointType; - using RealType = typename DistanceTable::RealType; - using PosType = typename DistanceTable::PosType; - - enum class Region - { - INSIDE, // within the buffer shell - BUFFER, // in the buffer region - INTER // interstitial area - }; - - struct LocationSmoothingInfo - { - ///r from distance table - RealType dist_r; - ///dr from distance table - PosType dist_dr; - ///for APBC - PointType r_image; - /// region of the location - Region region; - ///smooth function value - RealType f; - ///smooth function first derivative - RealType df_dr; - ///smooth function second derivative - RealType d2f_dr2; - }; - -private: - ///atomic centers - std::vector> AtomicCenters; - ///table index - int myTableID; - ///mapping supercell to primitive cell - std::vector Super2Prim; - ///smoothing schemes - enum class smoothing_schemes - { - CONSISTENT = 0, - SMOOTHALL, - SMOOTHPARTIAL - } smooth_scheme; - /// smoothing function - smoothing_functions smooth_func_id; - - /// select a region (within the buffer shell, in the buffer, interstitial region) and compute the smoothing function if in the buffer. - inline void selectRegionAndComputeSmoothing(const ST& cutoff_buffer, - const ST& cutoff, - LocationSmoothingInfo& info) const - { - const RealType r = info.dist_r; - if (r < cutoff_buffer) - info.region = Region::INSIDE; - else if (r < cutoff) - { - constexpr RealType cone(1); - const RealType scale = cone / (cutoff - cutoff_buffer); - const RealType x = (r - cutoff_buffer) * scale; - info.f = smoothing(smooth_func_id, x, info.df_dr, info.d2f_dr2); - info.df_dr *= scale; - info.d2f_dr2 *= scale * scale; - info.region = Region::BUFFER; - } - else - info.region = Region::INTER; - } - -public: - HybridRepCenterOrbitals() {} - - void set_info(const ParticleSet& ions, ParticleSet& els, const std::vector& mapping) - { - myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); - Super2Prim = mapping; - } - - inline void resizeStorage(size_t Nb) - { - size_t SplineCoefsBytes = 0; - - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - AtomicCenters[ic].resizeStorage(Nb); - SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); - } - - app_log() << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " - << "for the atomic radial splines in hybrid orbital representation" << std::endl; - } - - void bcast_tables(Communicate* comm) - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].bcast_tables(comm); - } - - void gather_atomic_tables(Communicate* comm, std::vector& offset) - { - if (comm->size() == 1) - return; - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].gather_tables(comm, offset); - } - - inline void flush_zero() - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].flush_zero(); - } - - bool read_splines(hdf_archive& h5f) - { - bool success = true; - size_t ncenter; - - try - { - h5f.push("atomic_centers", false); - } - catch (...) - { - success = false; - } - success = success && h5f.readEntry(ncenter, "number_of_centers"); - if (!success) - return success; - if (ncenter != AtomicCenters.size()) - success = false; - // read splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - std::ostringstream gname; - gname << "center_" << ic; - try - { - h5f.push(gname.str().c_str(), false); - } - catch (...) - { - success = false; - } - success = success && AtomicCenters[ic].read_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - bool write_splines(hdf_archive& h5f) - { - bool success = true; - int ncenter = AtomicCenters.size(); - try - { - h5f.push("atomic_centers", true); - } - catch (...) - { - success = false; - } - success = success && h5f.writeEntry(ncenter, "number_of_centers"); - // write splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - std::ostringstream gname; - gname << "center_" << ic; - try - { - h5f.push(gname.str().c_str(), true); - } - catch (...) - { - success = false; - } - success = success && AtomicCenters[ic].write_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - template - inline int get_bc_sign(const PointType& r, - const PointType& r_image, - const Cell& PrimLattice, - TinyVector& HalfG) const - { - int bc_sign = 0; - PointType shift_unit = PrimLattice.toUnit(r - r_image); - for (int i = 0; i < D; i++) - { - ST img = round(shift_unit[i]); - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - //evaluate only V - template - inline void evaluate_v(const ParticleSet& P, const int iat, VV& myV, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_v(info.dist_r, dr, myV); - } - } - - /* check if the batched algorithm is safe to operate - * @param VP virtual particle set - * @return true if it is safe - * - * When the reference electron in the NLPP evaluation has a distance larger than the non overlapping radius of the reference center. - * Some qudrature points may get its SPOs evaluated from the nearest center which is not the reference center. - * The batched algorthm forces the evaluation on the reference center and introduce some error. - * In this case, the non-batched algorithm should be used. - */ - bool is_batched_safe(const VirtualParticleSet& VP) const - { - const int center_idx = VP.refSourcePtcl; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - return VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < - myCenter.getNonOverlappingRadius(); - } - - // C2C, C2R cases - template - inline void evaluateValuesC2X(const VirtualParticleSet& VP, VM& multi_myV, LocationSmoothingInfo& info) - { - const int center_idx = VP.refSourcePtcl; - info.dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, info.dist_r, multi_myV); - } - - // R2R case - template - inline void evaluateValuesR2R(const VirtualParticleSet& VP, - const Cell& PrimLattice, - TinyVector& HalfG, - VM& multi_myV, - SV& bc_signs, - LocationSmoothingInfo& info) - { - const int center_idx = VP.refSourcePtcl; - info.dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); - for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) - bc_signs[ivp] = get_bc_sign(VP.R[ivp], myCenter.getCenterPos() - displ[ivp][center_idx], PrimLattice, HalfG); - myCenter.evaluateValues(displ, center_idx, info.dist_r, multi_myV); - } - } - - //evaluate only VGL - template - inline void evaluate_vgl(const ParticleSet& P, const int iat, VV& myV, GV& myG, VV& myL, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgl(info.dist_r, dr, myV, myG, myL); - } - } - - //evaluate only VGH - template - inline void evaluate_vgh(const ParticleSet& P, const int iat, VV& myV, GV& myG, HT& myH, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgh(info.dist_r, dr, myV, myG, myH); - } - } - - // interpolate buffer region, value only - template - inline void interpolate_buffer_v(VV& psi, const VV& psi_AO, const RealType f) const - { - constexpr RealType cone(1); - for (size_t i = 0; i < psi.size(); i++) - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - - // interpolate buffer region, value, gradients and laplacian - template - inline void interpolate_buffer_vgl(VV& psi, - GV& dpsi, - VV& d2psi, - const VV& psi_AO, - const GV& dpsi_AO, - const VV& d2psi_AO, - const LocationSmoothingInfo& info) const - { - constexpr RealType cone(1), ctwo(2); - const RealType rinv(1.0 / info.dist_r); - auto& dist_dr = info.dist_dr; - auto& f = info.f; - auto& df_dr = info.df_dr; - auto& d2f_dr2 = info.d2f_dr2; - if (smooth_scheme == smoothing_schemes::CONSISTENT) - for (size_t i = 0; i < psi.size(); i++) - { // psi, dpsi, d2psi are all consistent - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + - (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHALL) - for (size_t i = 0; i < psi.size(); i++) - { - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) - for (size_t i = 0; i < psi.size(); i++) - { // dpsi, d2psi are consistent but psi is not. - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else - throw std::runtime_error("Unknown smooth scheme!"); - } - - template - friend class qmcplusplus::HybridRepSetReader; -}; - -extern template class AtomicOrbitals; -extern template class AtomicOrbitals; -extern template class HybridRepCenterOrbitals; -extern template class HybridRepCenterOrbitals; +using HybridRepCenterOrbitals = HybridRepCenterOrbitals; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h index 79405ee8aa..aedaee7cc1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h @@ -18,8 +18,8 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_CPLX_H #define QMCPLUSPLUS_HYBRIDREP_CPLX_H -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "CPU/SIMD/inner_product.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h" namespace qmcplusplus { @@ -29,222 +29,7 @@ namespace qmcplusplus * Only works with SPLINEBASE class containing complex splines */ template -class HybridRepCplx : public SPLINEBASE, private HybridRepCenterOrbitals -{ -public: - using HYBRIDBASE = HybridRepCenterOrbitals; - using ST = typename SPLINEBASE::DataType; - using PointType = typename SPLINEBASE::PointType; - using SingleSplineType = typename SPLINEBASE::SingleSplineType; - using RealType = typename SPLINEBASE::RealType; - // types for evaluation results - using typename SPLINEBASE::GGGVector; - using typename SPLINEBASE::GradMatrix; - using typename SPLINEBASE::GradType; - using typename SPLINEBASE::GradVector; - using typename SPLINEBASE::HessVector; - using typename SPLINEBASE::OffloadMWVGLArray; - using typename SPLINEBASE::ValueMatrix; - using typename SPLINEBASE::ValueType; - using typename SPLINEBASE::ValueVector; - -private: - using typename HYBRIDBASE::Region; - - ValueVector psi_AO, d2psi_AO; - GradVector dpsi_AO; - Matrix> multi_myV; - typename HYBRIDBASE::LocationSmoothingInfo info; - - using SPLINEBASE::myG; - using SPLINEBASE::myH; - using SPLINEBASE::myL; - using SPLINEBASE::myV; - -public: - HybridRepCplx(const std::string& my_name) : SPLINEBASE(my_name) {} - - std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } - std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } - bool isOMPoffload() const final { return false; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - SPLINEBASE::resizeStorage(n, nvals); - HYBRIDBASE::resizeStorage(myV.size()); - } - - void bcast_tables(Communicate* comm) - { - SPLINEBASE::bcast_tables(comm); - HYBRIDBASE::bcast_tables(comm); - } - - void gather_tables(Communicate* comm) - { - SPLINEBASE::gather_tables(comm); - HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); - } - - bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } - - bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } - - inline void flush_zero() - { - //SPLINEBASE::flush_zero(); - HYBRIDBASE::flush_zero(); - } - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override - { - HYBRIDBASE::evaluate_v(P, iat, myV, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(P, iat, psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_v(P.activeR(iat), myV, psi, 0, myV.size() / 2); - else - { - psi_AO.resize(psi.size()); - SPLINEBASE::assign_v(P.activeR(iat), myV, psi_AO, 0, myV.size() / 2); - SPLINEBASE::evaluateValue(P, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - } - - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - if (VP.isOnSphere()) - { - // resize scratch space - psi_AO.resize(psi.size()); - if (multi_myV.rows() < VP.getTotalNum()) - multi_myV.resize(VP.getTotalNum(), myV.size()); - HYBRIDBASE::evaluateValuesC2X(VP, multi_myV, info); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(VP, iat, psi); - else if (info.region == Region::INSIDE) - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(VP.R[iat], myV_one, psi, 0, myV.size() / 2); - } - else - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(VP.R[iat], myV_one, psi_AO, 0, myV.size() / 2); - SPLINEBASE::evaluateValue(VP, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - else - { - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final - { - BsplineSet::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); - } - - void evaluateVGL(const ParticleSet& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_vgl_from_l(P.activeR(iat), psi, dpsi, d2psi); - else - { - psi_AO.resize(psi.size()); - dpsi_AO.resize(psi.size()); - d2psi_AO.resize(psi.size()); - SPLINEBASE::assign_vgl_from_l(P.activeR(iat), psi_AO, dpsi_AO, d2psi_AO); - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO, info); - } - } - - void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final - { - BsplineSet::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final - { - BsplineSet::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); - } - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluate_vgh not implemented!"); - HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - else - SPLINEBASE::assign_vgh(P.activeR(iat), psi, dpsi, grad_grad_psi, 0, myV.size() / 2); - } - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluate_vghgh not implemented!"); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final - { - // bypass SPLINEBASE::evaluate_notranspose - BsplineSet::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - template - friend class HybridRepSetReader; - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using HybridRepCplx = HybridRepCplxT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h index 3cf6a2065c..89dda48341 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h @@ -18,240 +18,13 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_REAL_H #define QMCPLUSPLUS_HYBRIDREP_REAL_H -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "CPU/SIMD/inner_product.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepRealT.h" namespace qmcplusplus { -/** hybrid representation orbitals combining B-spline orbitals on a grid and atomic centered orbitals. - * @tparam SPLINEBASE B-spline orbital class. - * - * Only works with SPLINEBASE class containing real splines - */ template -class HybridRepReal : public SPLINEBASE, private HybridRepCenterOrbitals -{ -public: - using HYBRIDBASE = HybridRepCenterOrbitals; - using ST = typename SPLINEBASE::DataType; - using PointType = typename SPLINEBASE::PointType; - using SingleSplineType = typename SPLINEBASE::SingleSplineType; - using RealType = typename SPLINEBASE::RealType; - // types for evaluation results - using typename SPLINEBASE::GGGVector; - using typename SPLINEBASE::GradMatrix; - using typename SPLINEBASE::GradType; - using typename SPLINEBASE::GradVector; - using typename SPLINEBASE::HessVector; - using typename SPLINEBASE::OffloadMWVGLArray; - using typename SPLINEBASE::ValueMatrix; - using typename SPLINEBASE::ValueType; - using typename SPLINEBASE::ValueVector; - -private: - using typename HYBRIDBASE::Region; - - ValueVector psi_AO, d2psi_AO; - GradVector dpsi_AO; - Matrix> multi_myV; - typename HYBRIDBASE::LocationSmoothingInfo info; - - using SPLINEBASE::HalfG; - using SPLINEBASE::myG; - using SPLINEBASE::myH; - using SPLINEBASE::myL; - using SPLINEBASE::myV; - using SPLINEBASE::PrimLattice; - -public: - HybridRepReal(const std::string& my_name) : SPLINEBASE(my_name) {} - - std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } - std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } - bool isOMPoffload() const final { return false; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - SPLINEBASE::resizeStorage(n, nvals); - HYBRIDBASE::resizeStorage(myV.size()); - } - - void bcast_tables(Communicate* comm) - { - SPLINEBASE::bcast_tables(comm); - HYBRIDBASE::bcast_tables(comm); - } - - void gather_tables(Communicate* comm) - { - SPLINEBASE::gather_tables(comm); - HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); - } - - inline void flush_zero() - { - //SPLINEBASE::flush_zero(); - HYBRIDBASE::flush_zero(); - } - - bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } - - bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override - { - HYBRIDBASE::evaluate_v(P, iat, myV, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(P, iat, psi); - else if (info.region == Region::INSIDE) - { - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_v(bc_sign, myV, psi, 0, myV.size()); - } - else - { - psi_AO.resize(psi.size()); - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_v(bc_sign, myV, psi_AO, 0, myV.size()); - SPLINEBASE::evaluateValue(P, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - } - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - if (VP.isOnSphere() && HYBRIDBASE::is_batched_safe(VP)) - { - // resize scratch space - psi_AO.resize(psi.size()); - if (multi_myV.rows() < VP.getTotalNum()) - multi_myV.resize(VP.getTotalNum(), myV.size()); - std::vector bc_signs(VP.getTotalNum()); - HYBRIDBASE::evaluateValuesR2R(VP, PrimLattice, HalfG, multi_myV, bc_signs, info); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(VP, iat, psi); - else if (info.region == Region::INSIDE) - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi, 0, myV.size()); - } - else - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi_AO, 0, myV.size()); - SPLINEBASE::evaluateValue(VP, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - else - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final - { - BsplineSet::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); - } - - void evaluateVGL(const ParticleSet& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_vgl_from_l(HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG), psi, - dpsi, d2psi); - else - { - psi_AO.resize(psi.size()); - dpsi_AO.resize(psi.size()); - d2psi_AO.resize(psi.size()); - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_vgl_from_l(bc_sign, psi_AO, dpsi_AO, d2psi_AO); - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO, info); - } - } - - void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final - { - BsplineSet::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final - { - BsplineSet::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); - } - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - APP_ABORT("HybridRepReal::evaluateVGH not implemented!"); - HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH, info); - if (info.region == Region::INTER) - SPLINEBASE::assign_vgh(HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG), psi, dpsi, - grad_grad_psi, 0, myV.size()); - else - SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluateVGHGH not implemented!"); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final - { - // bypass SPLINEBASE::evaluate_notranspose - BsplineSet::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - template - friend class HybridRepSetReader; - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using HybridRepReal = HybridRepRealT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h deleted file mode 100644 index a54219c80c..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h +++ /dev/null @@ -1,566 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -/** @file - * - * derived from SplineSetReader - */ - -#ifndef QMCPLUSPLUS_HYBRIDREP_READER_H -#define QMCPLUSPLUS_HYBRIDREP_READER_H - -#include "Numerics/Quadrature.h" -#include "Numerics/Bessel.h" -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" -#include "OhmmsData/AttributeSet.h" -#include "CPU/math.hpp" -#include "Concurrency/OpenMP.h" - -namespace qmcplusplus -{ -template -struct Gvectors -{ - using PosType = TinyVector; - using ValueType = std::complex; - - const LT& Lattice; - std::vector gvecs_cart; //Cartesian. - std::vector gmag; - const size_t NumGvecs; - - Gvectors(const std::vector>& gvecs_in, - const LT& Lattice_in, - const TinyVector& HalfG, - size_t first, - size_t last) - : Lattice(Lattice_in), NumGvecs(last - first) - { - gvecs_cart.resize(NumGvecs); - gmag.resize(NumGvecs); -#pragma omp parallel for - for (size_t ig = 0; ig < NumGvecs; ig++) - { - TinyVector gvec_shift; - gvec_shift = gvecs_in[ig + first] + HalfG * 0.5; - gvecs_cart[ig] = Lattice.k_cart(gvec_shift); - gmag[ig] = std::sqrt(dot(gvecs_cart[ig], gvecs_cart[ig])); - } - } - - template - void calc_Ylm_G(const size_t ig, YLM_ENGINE& Ylm, VVT& YlmG) const - { - PosType Ghat(0.0, 0.0, 1.0); - if (gmag[ig] > 0) - Ghat = gvecs_cart[ig] / gmag[ig]; - Ylm.evaluateV(Ghat[0], Ghat[1], Ghat[2], YlmG.data()); - } - - template - inline void calc_jlm_G(const int lmax, ST& r, const size_t ig, VVT& j_lm_G) const - { - bessel_steed_array_cpu(lmax, gmag[ig] * r, j_lm_G.data()); - for (size_t l = lmax; l > 0; l--) - for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) - j_lm_G[lm] = j_lm_G[l]; - } - - template - inline void calc_phase_shift(const PT& RSoA, const size_t ig, VT& phase_shift_real, VT& phase_shift_imag) const - { - const ST* restrict px = RSoA.data(0); - const ST* restrict py = RSoA.data(1); - const ST* restrict pz = RSoA.data(2); - ST* restrict v_r = phase_shift_real.data(); - ST* restrict v_i = phase_shift_imag.data(); - const ST& gv_x = gvecs_cart[ig][0]; - const ST& gv_y = gvecs_cart[ig][1]; - const ST& gv_z = gvecs_cart[ig][2]; - -#pragma omp simd aligned(px, py, pz, v_r, v_i : QMC_SIMD_ALIGNMENT) - for (size_t iat = 0; iat < RSoA.size(); iat++) - qmcplusplus::sincos(px[iat] * gv_x + py[iat] * gv_y + pz[iat] * gv_z, v_i + iat, v_r + iat); - } - - template - ValueType evaluate_psi_r(const Vector>& cG, const PT& pos) - { - assert(cG.size() == NumGvecs); - std::complex val(0.0, 0.0); - for (size_t ig = 0; ig < NumGvecs; ig++) - { - ST s, c; - qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); - ValueType pw0(c, s); - val += cG[ig] * pw0; - } - return val; - } - - template - void evaluate_psi_r(const Vector>& cG, const PT& pos, ValueType& phi, ValueType& d2phi) - { - assert(cG.size() == NumGvecs); - d2phi = phi = 0.0; - for (size_t ig = 0; ig < NumGvecs; ig++) - { - ST s, c; - qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); - ValueType pw0(c, s); - phi += cG[ig] * pw0; - d2phi += cG[ig] * pw0 * (-dot(gvecs_cart[ig], gvecs_cart[ig])); - } - } - - double evaluate_KE(const Vector>& cG) - { - assert(cG.size() == NumGvecs); - double KE = 0; - for (size_t ig = 0; ig < NumGvecs; ig++) - KE += dot(gvecs_cart[ig], gvecs_cart[ig]) * (cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag()); - return KE / 2.0; - } -}; - - -/** General HybridRepSetReader to handle any unitcell - */ -template -class HybridRepSetReader : public SplineSetReader -{ -public: - using BaseReader = SplineSetReader; - - using BaseReader::bspline; - using BaseReader::mybuilder; - using BaseReader::rotate_phase_i; - using BaseReader::rotate_phase_r; - using typename BaseReader::DataType; - - HybridRepSetReader(EinsplineSetBuilder* e) : BaseReader(e) {} - - /** initialize basic parameters of atomic orbitals */ - void initialize_hybridrep_atomic_centers() override - { - OhmmsAttributeSet a; - std::string scheme_name("Consistent"); - std::string s_function_name("LEKS2018"); - a.add(scheme_name, "smoothing_scheme"); - a.add(s_function_name, "smoothing_function"); - a.put(mybuilder->XMLRoot); - // assign smooth_scheme - if (scheme_name == "Consistent") - bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; - else if (scheme_name == "SmoothAll") - bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL; - else if (scheme_name == "SmoothPartial") - bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; - else - APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme name! Only allows Consistent, SmoothAll or " - "SmoothPartial."); - - // assign smooth_function - if (s_function_name == "LEKS2018") - bspline->smooth_func_id = smoothing_functions::LEKS2018; - else if (s_function_name == "coscos") - bspline->smooth_func_id = smoothing_functions::COSCOS; - else if (s_function_name == "linear") - bspline->smooth_func_id = smoothing_functions::LINEAR; - else - APP_ABORT( - "initialize_hybridrep_atomic_centers wrong smoothing_function name! Only allows LEKS2018, coscos or linear."); - app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name - << " smoothing function." << std::endl; - - bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, mybuilder->Super2Prim); - auto& centers = bspline->AtomicCenters; - auto& ACInfo = mybuilder->AtomicCentersInfo; - // load atomic center info only when it is not initialized - if (centers.size() == 0) - { - bool success = true; - app_log() << "Reading atomic center info for hybrid representation" << std::endl; - for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) - { - const int my_GroupID = ACInfo.GroupID[center_idx]; - if (ACInfo.cutoff[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'cutoff_radius' for atom " << center_idx - << std::endl; - success = false; - } - - if (ACInfo.inner_cutoff[center_idx] < 0) - { - const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); - app_log() << "Hybrid orbital representation setting 'inner_cutoff' to " << inner_cutoff << " for group " - << my_GroupID << " as atom " << center_idx << std::endl; - // overwrite the inner_cutoff of all the atoms of the same species - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) - ACInfo.inner_cutoff[id] = inner_cutoff; - } - else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx]) - { - app_error() << "Hybrid orbital representation 'inner_cutoff' must be smaller than 'spline_radius' for atom " - << center_idx << std::endl; - success = false; - } - - if (ACInfo.cutoff[center_idx] > 0) - { - if (ACInfo.lmax[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'lmax' for atom " << center_idx << std::endl; - success = false; - } - - if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0) - { - app_log() << "Parameters 'spline_radius' and 'spline_npoints' for group " << my_GroupID << " as atom " - << center_idx << " are not specified." << std::endl; - const double delta = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); - const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3; - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) - { - ACInfo.spline_npoints[id] = n_grid_point; - ACInfo.spline_radius[id] = (n_grid_point - 1) * delta; - } - app_log() << " Based on default grid point distance " << delta << std::endl; - app_log() << " Setting 'spline_npoints' to " << ACInfo.spline_npoints[center_idx] << std::endl; - app_log() << " Setting 'spline_radius' to " << ACInfo.spline_radius[center_idx] << std::endl; - } - else - { - if (ACInfo.spline_radius[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'spline_radius' for atom " << center_idx - << std::endl; - success = false; - } - - if (ACInfo.spline_npoints[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'spline_npoints' for atom " << center_idx - << std::endl; - success = false; - } - } - - // check maximally allowed cutoff_radius - double max_allowed_cutoff = ACInfo.spline_radius[center_idx] - - 2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1); - if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff) - { - app_error() << "Hybrid orbital representation requires cutoff_radius<=" << max_allowed_cutoff - << " calculated by spline_radius-2*spline_radius/(spline_npoints-1) for atom " << center_idx - << std::endl; - success = false; - } - } - else - { - // no atomic regions for this atom type - ACInfo.spline_radius[center_idx] = 0.0; - ACInfo.spline_npoints[center_idx] = 0; - ACInfo.lmax[center_idx] = 0; - } - } - if (!success) - BaseReader::myComm->barrier_and_abort("initialize_hybridrep_atomic_centers Failed to initialize atomic centers " - "in hybrid orbital representation!"); - - for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) - { - AtomicOrbitals oneCenter(ACInfo.lmax[center_idx]); - oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], - ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx], - ACInfo.spline_npoints[center_idx]); - centers.push_back(oneCenter); - } - } - } - - /** initialize construct atomic orbital radial functions from plane waves */ - inline void create_atomic_centers_Gspace(Vector>& cG, - Communicate& band_group_comm, - int iorb) override - { - band_group_comm.bcast(rotate_phase_r); - band_group_comm.bcast(rotate_phase_i); - band_group_comm.bcast(cG); - //distribute G-vectors over processor groups - const int Ngvecs = mybuilder->Gvecs[0].size(); - const int Nprocs = band_group_comm.size(); - const int Ngvecgroups = std::min(Ngvecs, Nprocs); - Communicate gvec_group_comm(band_group_comm, Ngvecgroups); - std::vector gvec_groups(Ngvecgroups + 1, 0); - FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); - const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; - const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; - - // prepare Gvecs Ylm(G) - using UnitCellType = typename EinsplineSetBuilder::UnitCellType; - Gvectors Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline->HalfG, gvec_first, - gvec_last); - // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; - - std::vector>& centers = bspline->AtomicCenters; - app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; - // collect atomic centers by group - std::vector uniq_species; - for (int center_idx = 0; center_idx < centers.size(); center_idx++) - { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - int found_idx = -1; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) - { - found_idx = idx; - break; - } - if (found_idx < 0) - uniq_species.push_back(my_GroupID); - } - // construct group list - std::vector> group_list(uniq_species.size()); - for (int center_idx = 0; center_idx < centers.size(); center_idx++) - { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) - { - group_list[idx].push_back(center_idx); - break; - } - } - - for (int group_idx = 0; group_idx < group_list.size(); group_idx++) - { - const auto& mygroup = group_list[group_idx]; - const double spline_radius = centers[mygroup[0]].getSplineRadius(); - const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); - const int lmax = centers[mygroup[0]].getLmax(); - const double delta = spline_radius / static_cast(spline_npoints - 1); - const int lm_tot = (lmax + 1) * (lmax + 1); - const size_t natoms = mygroup.size(); - const int policy = lm_tot > natoms ? 0 : 1; - - std::vector> i_power(lm_tot); - // rotate phase is introduced here. - std::complex i_temp(rotate_phase_r, rotate_phase_i); - for (size_t l = 0; l <= lmax; l++) - { - for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) - i_power[lm] = i_temp; - i_temp *= std::complex(0.0, 1.0); - } - - std::vector> all_vals(natoms); - std::vector>> vals_local(spline_npoints * omp_get_max_threads()); - VectorSoaContainer myRSoA(natoms); - for (size_t idx = 0; idx < natoms; idx++) - { - all_vals[idx].resize(spline_npoints, lm_tot * 2); - all_vals[idx] = 0.0; - myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); - } - -#pragma omp parallel - { - const size_t tid = omp_get_thread_num(); - const size_t nt = omp_get_num_threads(); - - for (int ip = 0; ip < spline_npoints; ip++) - { - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) - { - vals_local[ip_idx].resize(lm_tot * 2); - for (size_t lm = 0; lm < lm_tot * 2; lm++) - { - auto& vals = vals_local[ip_idx][lm]; - vals.resize(natoms); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - else - { - vals_local[ip_idx].resize(natoms * 2); - for (size_t iat = 0; iat < natoms * 2; iat++) - { - auto& vals = vals_local[ip_idx][iat]; - vals.resize(lm_tot); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - } - - const size_t size_pw_tile = 32; - const size_t num_pw_tiles = (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; - aligned_vector j_lm_G(lm_tot, 0.0); - std::vector> phase_shift_r(size_pw_tile); - std::vector> phase_shift_i(size_pw_tile); - std::vector> YlmG(size_pw_tile); - for (size_t ig = 0; ig < size_pw_tile; ig++) - { - phase_shift_r[ig].resize(natoms); - phase_shift_i[ig].resize(natoms); - YlmG[ig].resize(lm_tot); - } - SoaSphericalTensor Ylm(lmax); - -#pragma omp for - for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) - { - const size_t ig_first = tile_id * size_pw_tile; - const size_t ig_last = std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); - for (size_t ig = ig_first; ig < ig_last; ig++) - { - const size_t ig_local = ig - ig_first; - // calculate phase shift for all the centers of this group - Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]); - Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); - } - - for (int ip = 0; ip < spline_npoints; ip++) - { - double r = delta * static_cast(ip); - const size_t ip_idx = tid * spline_npoints + ip; - - for (size_t ig = ig_first; ig < ig_last; ig++) - { - const size_t ig_local = ig - ig_first; - // calculate spherical bessel function - Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); - for (size_t lm = 0; lm < lm_tot; lm++) - j_lm_G[lm] *= YlmG[ig_local][lm]; - - const double cG_r = cG[ig + gvec_first].real(); - const double cG_i = cG[ig + gvec_first].imag(); - if (policy == 1) - { - for (size_t lm = 0; lm < lm_tot; lm++) - { - double* restrict vals_r = vals_local[ip_idx][lm * 2].data(); - double* restrict vals_i = vals_local[ip_idx][lm * 2 + 1].data(); - const double* restrict ps_r_ptr = phase_shift_r[ig_local].data(); - const double* restrict ps_i_ptr = phase_shift_i[ig_local].data(); - double cG_j_r = cG_r * j_lm_G[lm]; - double cG_j_i = cG_i * j_lm_G[lm]; -#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr : QMC_SIMD_ALIGNMENT) - for (size_t idx = 0; idx < natoms; idx++) - { - const double ps_r = ps_r_ptr[idx]; - const double ps_i = ps_i_ptr[idx]; - vals_r[idx] += cG_j_r * ps_r - cG_j_i * ps_i; - vals_i[idx] += cG_j_i * ps_r + cG_j_r * ps_i; - } - } - } - else - { - for (size_t idx = 0; idx < natoms; idx++) - { - double* restrict vals_r = vals_local[ip_idx][idx * 2].data(); - double* restrict vals_i = vals_local[ip_idx][idx * 2 + 1].data(); - const double* restrict j_lm_G_ptr = j_lm_G.data(); - double cG_ps_r = cG_r * phase_shift_r[ig_local][idx] - cG_i * phase_shift_i[ig_local][idx]; - double cG_ps_i = cG_i * phase_shift_r[ig_local][idx] + cG_r * phase_shift_i[ig_local][idx]; -#pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT) - for (size_t lm = 0; lm < lm_tot; lm++) - { - const double jlm = j_lm_G_ptr[lm]; - vals_r[lm] += cG_ps_r * jlm; - vals_i[lm] += cG_ps_i * jlm; - } - } - } - } - } - } - -#pragma omp for collapse(2) - for (int ip = 0; ip < spline_npoints; ip++) - for (size_t idx = 0; idx < natoms; idx++) - { - double* vals = all_vals[idx][ip]; - for (size_t tid = 0; tid < nt; tid++) - for (size_t lm = 0; lm < lm_tot; lm++) - { - double vals_th_r, vals_th_i; - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) - { - vals_th_r = vals_local[ip_idx][lm * 2][idx]; - vals_th_i = vals_local[ip_idx][lm * 2 + 1][idx]; - } - else - { - vals_th_r = vals_local[ip_idx][idx * 2][lm]; - vals_th_i = vals_local[ip_idx][idx * 2 + 1][lm]; - } - const double real_tmp = 4.0 * M_PI * i_power[lm].real(); - const double imag_tmp = 4.0 * M_PI * i_power[lm].imag(); - vals[lm] += vals_th_r * real_tmp - vals_th_i * imag_tmp; - vals[lm + lm_tot] += vals_th_i * real_tmp + vals_th_r * imag_tmp; - } - } - } - //app_log() << "Building band " << iorb << " at center " << center_idx << std::endl; - - for (size_t idx = 0; idx < natoms; idx++) - { - // reduce all_vals - band_group_comm.reduce_in_place(all_vals[idx].data(), all_vals[idx].size()); - if (!band_group_comm.isGroupLeader()) - continue; -#pragma omp parallel for - for (int lm = 0; lm < lm_tot; lm++) - { - auto& mycenter = centers[mygroup[idx]]; - aligned_vector splineData_r(spline_npoints); - UBspline_1d_d* atomic_spline_r = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_r[ip] = all_vals[idx][ip][lm]; - atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(), - ((lm == 0) || (lm > 3))); - if (!bspline->isComplex()) - { - mycenter.set_spline(atomic_spline_r, lm, iorb); - einspline::destroy(atomic_spline_r); - } - else - { - aligned_vector splineData_i(spline_npoints); - UBspline_1d_d* atomic_spline_i = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; - atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints, - splineData_i.data(), ((lm == 0) || (lm > 3))); - mycenter.set_spline(atomic_spline_r, lm, iorb * 2); - mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); - einspline::destroy(atomic_spline_r); - einspline::destroy(atomic_spline_i); - } - } - } - } - } -}; -} // namespace qmcplusplus -#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h index affb06638c..6783bc4b6b 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h @@ -18,11 +18,114 @@ #include "Numerics/Quadrature.h" #include "OhmmsData/AttributeSet.h" #include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" -#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h" #include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" namespace qmcplusplus { +template +struct Gvectors +{ + using PosType = TinyVector; + using ValueType = std::complex; + + const LT& Lattice; + std::vector gvecs_cart; //Cartesian. + std::vector gmag; + const size_t NumGvecs; + + Gvectors(const std::vector>& gvecs_in, + const LT& Lattice_in, + const TinyVector& HalfG, + size_t first, + size_t last) + : Lattice(Lattice_in), NumGvecs(last - first) + { + gvecs_cart.resize(NumGvecs); + gmag.resize(NumGvecs); +#pragma omp parallel for + for (size_t ig = 0; ig < NumGvecs; ig++) + { + TinyVector gvec_shift; + gvec_shift = gvecs_in[ig + first] + HalfG * 0.5; + gvecs_cart[ig] = Lattice.k_cart(gvec_shift); + gmag[ig] = std::sqrt(dot(gvecs_cart[ig], gvecs_cart[ig])); + } + } + + template + void calc_Ylm_G(const size_t ig, YLM_ENGINE& Ylm, VVT& YlmG) const + { + PosType Ghat(0.0, 0.0, 1.0); + if (gmag[ig] > 0) + Ghat = gvecs_cart[ig] / gmag[ig]; + Ylm.evaluateV(Ghat[0], Ghat[1], Ghat[2], YlmG.data()); + } + + template + inline void calc_jlm_G(const int lmax, ST& r, const size_t ig, VVT& j_lm_G) const + { + bessel_steed_array_cpu(lmax, gmag[ig] * r, j_lm_G.data()); + for (size_t l = lmax; l > 0; l--) + for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) + j_lm_G[lm] = j_lm_G[l]; + } + + template + inline void calc_phase_shift(const PT& RSoA, const size_t ig, VT& phase_shift_real, VT& phase_shift_imag) const + { + const ST* restrict px = RSoA.data(0); + const ST* restrict py = RSoA.data(1); + const ST* restrict pz = RSoA.data(2); + ST* restrict v_r = phase_shift_real.data(); + ST* restrict v_i = phase_shift_imag.data(); + const ST& gv_x = gvecs_cart[ig][0]; + const ST& gv_y = gvecs_cart[ig][1]; + const ST& gv_z = gvecs_cart[ig][2]; + +#pragma omp simd aligned(px, py, pz, v_r, v_i : QMC_SIMD_ALIGNMENT) + for (size_t iat = 0; iat < RSoA.size(); iat++) + qmcplusplus::sincos(px[iat] * gv_x + py[iat] * gv_y + pz[iat] * gv_z, v_i + iat, v_r + iat); + } + + template + ValueType evaluate_psi_r(const Vector>& cG, const PT& pos) + { + assert(cG.size() == NumGvecs); + std::complex val(0.0, 0.0); + for (size_t ig = 0; ig < NumGvecs; ig++) + { + ST s, c; + qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); + ValueType pw0(c, s); + val += cG[ig] * pw0; + } + return val; + } + + template + void evaluate_psi_r(const Vector>& cG, const PT& pos, ValueType& phi, ValueType& d2phi) + { + assert(cG.size() == NumGvecs); + d2phi = phi = 0.0; + for (size_t ig = 0; ig < NumGvecs; ig++) + { + ST s, c; + qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); + ValueType pw0(c, s); + phi += cG[ig] * pw0; + d2phi += cG[ig] * pw0 * (-dot(gvecs_cart[ig], gvecs_cart[ig])); + } + } + + double evaluate_KE(const Vector>& cG) + { + assert(cG.size() == NumGvecs); + double KE = 0; + for (size_t ig = 0; ig < NumGvecs; ig++) + KE += dot(gvecs_cart[ig], gvecs_cart[ig]) * (cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag()); + return KE / 2.0; + } +}; /** General HybridRepSetReader to handle any unitcell */ diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp deleted file mode 100644 index ee7623188e..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp +++ /dev/null @@ -1,811 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include -#include "Concurrency/OpenMP.h" -#include "SplineC2C.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" -#include "CPU/SIMD/inner_product.hpp" -#include "CPU/BLAS.hpp" - -namespace qmcplusplus -{ -template -SplineC2C::SplineC2C(const SplineC2C& in) = default; - -template -inline void SplineC2C::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2C::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2C::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -void SplineC2C::storeParamsBeforeRotation() -{ - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); - - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); -} - -/* - ~~ Notes for rotation ~~ - spl_coefs = Raw pointer to spline coefficients - basis_set_size = Number of spline coefs per orbital - OrbitalSetSize = Number of orbitals (excluding padding) - - spl_coefs has a complicated layout depending on dimensionality of splines. - Luckily, for our purposes, we can think of spl_coefs as pointing to a - matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline - index adjacent in memory. The orbital index is SIMD aligned and therefore - may include padding. - - As a result, due to SIMD alignment, Nsplines may be larger than the - actual number of splined orbitals. This means that in practice rot_mat - may be smaller than the number of 'columns' in the coefs array! - - SplineR2R spl_coef layout: - ^ | sp1 | ... | spN | pad | - | |=====|=====|=====|=====| - | | c11 | ... | c1N | 0 | - basis_set_size | c21 | ... | c2N | 0 | - | | ... | ... | ... | 0 | - | | cM1 | ... | cMN | 0 | - v |=====|=====|=====|=====| - <------ Nsplines ------> - - SplineC2C spl_coef layout: - ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad | - | |=======|=======|=======|=======|=======|=======| - | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 | - basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 | - | | ... | ... | ... | ... | ... | ... | - | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 | - v |=======|=======|=======|=======|=======|=======| - <------------------ Nsplines ------------------> - - NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs - "matrix" is very tall and skinny. -*/ -template -void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto basis_set_size = coefs_tot_size / Nsplines; - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - if constexpr (std::is_same_v) - { - //if ST is double, go ahead and use blas to make things faster - //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals. - //Also casting them as ValueType so they are complex to do the correct gemm - BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(), - OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0), - (ValueType*)spl_coefs, Nsplines / 2); - } - else - { - // if ST is float, RealType is double and ValueType is std::complex for C2C - // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix - for (IndexType i = 0; i < basis_set_size; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) - { - // cur_elem points to the real componend of the coefficient. - // Imag component is adjacent in memory. - const auto cur_elem = Nsplines * i + 2 * j; - ST newval_r{0.}; - ST newval_i{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) - { - const auto index = Nsplines * i + 2 * k; - ST zr = (*coef_copy_)[index]; - ST zi = (*coef_copy_)[index + 1]; - ST wr = rot_mat[k][j].real(); - ST wi = rot_mat[k][j].imag(); - newval_r += zr * wr - zi * wi; - newval_i += zr * wi + zi * wr; - } - spl_coefs[cur_elem] = newval_r; - spl_coefs[cur_elem + 1] = newval_i; - } - } -} - -template -inline void SplineC2C::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart.data(0); - const ST* restrict ky = myKcart.data(1); - const ST* restrict kz = myKcart.data(2); -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } -} - -template -void SplineC2C::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } -} - -template -void SplineC2C::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -/** assign_vgl - */ -template -inline void SplineC2C::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - constexpr ST zero(0); - constexpr ST two(2); - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - - const size_t N = last_spo - first_spo; -#pragma omp simd - for (size_t j = 0; j < N; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } -} - -template -void SplineC2C::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } -} - -template -void SplineC2C::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } -} - -template -void SplineC2C::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2C::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } -} - -template -void SplineC2C::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template class SplineC2C; -template class SplineC2C; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h index 9410e80cfb..54528af444 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h @@ -18,212 +18,13 @@ #ifndef QMCPLUSPLUS_SPLINE_C2C_H #define QMCPLUSPLUS_SPLINE_C2C_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2CT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. - */ -template -class SplineC2C : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - // types for evaluation results - using ComplexT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2C(const std::string& my_name) : BsplineSet(my_name) {} - - SplineC2C(const SplineC2C& in); - virtual std::string getClassName() const override { return "SplineC2C"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - bool isRotationSupported() const override { return true; } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - Some results (using older Berkeley branch) were published in [3]. - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2C; -extern template class SplineC2C; +template +using SplineC2C = SplineC2CT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp deleted file mode 100644 index 2db3525864..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp +++ /dev/null @@ -1,1223 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SplineC2COMPTarget.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/OMPTarget/ompReductionComplex.hpp" -#include "ApplyPhaseC2C.hpp" -#include "Concurrency/OpenMP.h" - -namespace qmcplusplus -{ -template -SplineC2COMPTarget::SplineC2COMPTarget(const SplineC2COMPTarget& in) = default; - -template -inline void SplineC2COMPTarget::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2COMPTarget::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2COMPTarget::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -inline void SplineC2COMPTarget::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } -} - -template -void SplineC2COMPTarget::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } -} - -template -void SplineC2COMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 3); - - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - - // pack particle positions - auto* restrict pos_scratch = reinterpret_cast(psiinv_pos_copy.data() + psiinv.size()); - for (int iat = 0; iat < nVP; ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * nVP); - const auto orb_size = psiinv.size(); - results_scratch.resize(padded_size * nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ - map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; - auto* restrict pos_scratch = reinterpret_cast(psiinv_ptr + orb_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -template -void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSet& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = - reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - const VirtualParticleSet& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) - { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; - } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto padded_size = myV.size(); - mw_offload_scratch.resize(padded_size * mw_nVP); - mw_results_scratch.resize(padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ - map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST)); - auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4], - pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_v(pos_scratch[iat * 6], pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], psi_iat_ptr, - offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index); - - ComplexT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_cplx; i < last_cplx; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; - } - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - - const size_t N = last_spo - first_spo; -#pragma omp simd - for (size_t j = 0; j < N; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } -} - -template -void SplineC2COMPTarget::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi.size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, mKK_ptr, offload_scratch_ptr, padded_size, G, - myKcart_ptr, myKcart_padded_size, first_spo_local, index); - } - } - - for (size_t i = 0; i < orb_size; i++) - { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + padded_size]; - dpsi[i][1] = results_scratch[i + padded_size * 2]; - dpsi[i][2] = results_scratch[i + padded_size * 3]; - d2psi[i] = results_scratch[i + padded_size * 4]; - } -} - -template -void SplineC2COMPTarget::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto padded_size = myV.size(); - offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - const auto orb_size = psi_v_list[0].get().size(); - // for V(1)G(3)L(1) final result - results_scratch.resize(padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: pos_copy_ptr[0:num_pos*6]) \ - map(always, from: results_scratch_ptr[0:padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, - padded_size, mKK_ptr, offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) - { - auto* restrict results_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < orb_size; i++) - { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + padded_size * 4]; - } - } -} - -template -void SplineC2COMPTarget::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, - d2psi_v_list); -} - -template -void SplineC2COMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto orb_size = phi_vgl_v.size(2); - const auto padded_size = myV.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = first_spo; - const size_t phi_vgl_stride = num_pos * orb_size; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ - map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, - c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - - const size_t first_cplx = first / 2; - const size_t last_cplx = omptarget::min(last / 2, orb_size); - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, padded_size, mKK_ptr, - offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, - index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + padded_size; - ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2; - ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4; - - ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (int j = first_cplx; j < last_cplx; j++) - { - const size_t psiIndex = first_spo_local + j; - - out_phi[psiIndex] = psi[psiIndex]; - out_dphi_x[psiIndex] = dpsi_x[psiIndex]; - out_dphi_y[psiIndex] = dpsi_y[psiIndex]; - out_dphi_z[psiIndex] = dpsi_z[psiIndex]; - out_d2phi[psiIndex] = d2psi[psiIndex]; - - ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex]; - grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex]; - } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } - - for (int iw = 0; iw < num_pos; iw++) - { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; - } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; - } -} -template -void SplineC2COMPTarget::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } -} - -template -void SplineC2COMPTarget::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2COMPTarget::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } -} - -template -void SplineC2COMPTarget::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2COMPTarget::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) - { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) - { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols()); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols()); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols()); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } - - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); - } -} - -template class SplineC2COMPTarget; -template class SplineC2COMPTarget; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h index 774c646118..92b29539ef 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h @@ -18,299 +18,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H #define QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Utilities/FairDivide.h" -#include "Utilities/TimerManager.h" -#include -#include "SplineOMPTargetMultiWalkerMem.h" +#include "SplineC2COMPTargetT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs with OpenMP offload - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. - */ -template -class SplineC2COMPTarget : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using ComplexT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; - -private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2COMPTarget(const std::string& my_name) - : BsplineSet(my_name), - offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), - GGt_offload(std::make_shared>(9)), - PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2COMPTarget(const SplineC2COMPTarget& in); - - virtual std::string getClassName() const override { return "SplineC2COMPTarget"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (uint32_t i = 0; i < 9; i++) - { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) - { - (*mKK)[i] = -dot(kPoints[i], kPoints[i]); - (*myKcart)(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2COMPTarget; -extern template class SplineC2COMPTarget; +template +using SplineC2COMPTarget = SplineC2COMPTargetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp index dc68edbb82..ce4855d11b 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp @@ -33,26 +33,26 @@ inline void SplineC2CT::set_spline(SingleSplineType* spline_r, int ispline, int level) { - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); + SplineInst->copy_spline(spline_r, 2 * ispline); + SplineInst->copy_spline(spline_i, 2 * ispline + 1); } template bool SplineC2CT::read_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } template bool SplineC2CT::write_splines(hdf_archive& h5f) { - std::ostringstream o; - o << "spline_" << this->MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); + std::ostringstream o; + o << "spline_" << this->MyIndex; + einspline_engine bigtable(SplineInst->getSplinePtr()); + return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } template @@ -62,7 +62,7 @@ void SplineC2CT::storeParamsBeforeRotation() const auto coefs_tot_size = spline_ptr->coefs_size; coef_copy_ = std::make_shared>(coefs_tot_size); - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); + std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); } /* @@ -190,8 +190,8 @@ inline void SplineC2CT::assign_v(const PointType& r, template void SplineC2CT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -211,44 +211,46 @@ void SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, const ValueVector& psiinv, std::vector& ratios) { - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); + const bool need_resize = ratios_private.rows() < VP.getTotalNum(); #pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to - // the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); + int tid = omp_get_thread_num(); + // initialize thread private ratios + if (need_resize) + { + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime + ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier + } + int first, last; + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type + FairDivideAligned(2 * psi.size(), getAlignment(), + omp_get_num_threads(), tid, first, last); + const int first_cplx = first / 2; + const int last_cplx = + this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; + + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + const PointType& r = VP.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); + + spline2::evaluate3d( + SplineInst->getSplinePtr(), ru, myV, first, last); + assign_v(r, myV, psi, first_cplx, last_cplx); + ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, + psiinv.data() + first_cplx, last_cplx - first_cplx); + } } - int first, last; - // Factor of 2 because psi is complex and the spline storage and - // evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx); + // do the reduction manually + for (int iat = 0; iat < VP.getTotalNum(); ++iat) { + ratios[iat] = ComplexT(0); + for (int tid = 0; tid < ratios_private.cols(); tid++) + ratios[iat] += ratios_private[iat][tid]; } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } } /** assign_vgl @@ -352,7 +354,7 @@ inline void SplineC2CT::assign_vgl_from_l(const PointType& r, const ST* restrict g1 = myG.data(1); const ST* restrict g2 = myG.data(2); - const size_t N = this->last_spo - this->first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd for (size_t j = 0; j < N; ++j) { @@ -405,8 +407,8 @@ void SplineC2CT::evaluateVGL(const ParticleSetT& P, GradVector& dpsi, ValueVector& d2psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -546,8 +548,8 @@ void SplineC2CT::evaluateVGH(const ParticleSetT& P, GradVector& dpsi, HessVector& grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { @@ -807,8 +809,8 @@ void SplineC2CT::evaluateVGHGH(const ParticleSetT& P, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi) { - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); + const PointType& r = P.activeR(iat); + PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { int first, last; diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp deleted file mode 100644 index 8b6504888d..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp +++ /dev/null @@ -1,1189 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Anouar Benali, benali@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Concurrency/OpenMP.h" -#include "SplineC2R.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" -#include "CPU/SIMD/inner_product.hpp" - -namespace qmcplusplus -{ -template -SplineC2R::SplineC2R(const SplineC2R& in) = default; - -template -inline void SplineC2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2R::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2R::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -inline void SplineC2R::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart.data(0); - const ST* restrict ky = myKcart.data(1); - const ST* restrict kz = myKcart.data(2); - - TT* restrict psi_s = psi.data() + first_spo; - const size_t requested_orb_size = psi.size(); -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (jr < requested_orb_size) - psi_s[jr] = val_r * c - val_i * s; - if (ji < requested_orb_size) - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (j < requested_orb_size) - psi_s[j] = val_r * c - val_i * s; - } -} - -template -void SplineC2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } -} - -template -void SplineC2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - - const int first_real = first_cplx + std::min(nComplexBands, first_cplx); - const int last_real = last_cplx + std::min(nComplexBands, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_real, psiinv.data() + first_real, last_real - first_real); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -/** assign_vgl - */ -template -inline void SplineC2R::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - constexpr ST two(2); - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - const ST* restrict h00 = myH.data(0); - ASSUME_ALIGNED(h00); - const ST* restrict h01 = myH.data(1); - ASSUME_ALIGNED(h01); - const ST* restrict h02 = myH.data(2); - ASSUME_ALIGNED(h02); - const ST* restrict h11 = myH.data(3); - ASSUME_ALIGNED(h11); - const ST* restrict h12 = myH.data(4); - ASSUME_ALIGNED(h12); - const ST* restrict h22 = myH.data(5); - ASSUME_ALIGNED(h22); - - const size_t requested_orb_size = psi.size(); -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - if (psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - d2psi[psiIndex] = c * lap_r - s * lap_i; - } - if (psiIndex + 1 < requested_orb_size) - { - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - } - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - - const size_t N = kPoints.size(); - -#pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } - -#pragma omp simd - for (size_t j = nComplexBands; j < N; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } -} - -template -void SplineC2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } -} - -template -void SplineC2R::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } -} - -template -void SplineC2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2R::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } -} - -template -void SplineC2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template class SplineC2R; -template class SplineC2R; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h index 05b8c4a0b3..c98857e50f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h @@ -21,197 +21,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_H #define QMCPLUSPLUS_SPLINE_C2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr> SplineInst; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2R(const std::string& my_name) : BsplineSet(my_name), nComplexBands(0) {} - - SplineC2R(const SplineC2R& in); - virtual std::string getClassName() const override { return "SplineC2R"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2R; -extern template class SplineC2R; +template +using SplineC2R = SplineC2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp deleted file mode 100644 index 18c2a913c2..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp +++ /dev/null @@ -1,1704 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SplineC2ROMPTarget.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "ApplyPhaseC2R.hpp" -#include "Concurrency/OpenMP.h" - -namespace qmcplusplus -{ -template -SplineC2ROMPTarget::SplineC2ROMPTarget(const SplineC2ROMPTarget& in) = default; - -template -inline void SplineC2ROMPTarget::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2ROMPTarget::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2ROMPTarget::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -inline void SplineC2ROMPTarget::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); - - TT* restrict psi_s = psi.data() + first_spo; -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[jr] = val_r * c - val_i * s; - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[j] = val_r * c - val_i * s; - } -} - -template -void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - if (true) - { -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } - } - else - { - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - offload_scratch.resize(spline_padded_size); - results_scratch.resize(sposet_padded_size); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - auto* psi_ptr = psi.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size])") - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - } - - for (size_t i = 0; i < requested_orb_size; i++) - psi[i] = results_scratch[i]; - } - } -} - -template -void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 6); - - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - - // pack particle positions - auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size(); - for (int iat = 0; iat < nVP; ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - offload_scratch.resize(spline_padded_size * nVP); - results_scratch.resize(sposet_padded_size * nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psiinv.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ - map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; - auto* restrict pos_scratch = psiinv_ptr + requested_orb_size; - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -template -void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t requested_orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSet& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = - reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - const VirtualParticleSet& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) - { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; - } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - mw_offload_scratch.resize(spline_padded_size * mw_nVP); - mw_results_scratch.resize(sposet_padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ - map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; - } - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart->data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart->data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - - const size_t N = kPoints.size(); - -#pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } - -#pragma omp simd - for (size_t j = nComplexBands; j < N; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr, - spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local, - index); - } - } - - for (size_t i = 0; i < requested_orb_size; i++) - { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + sposet_padded_size * 1]; - dpsi[i][1] = results_scratch[i + sposet_padded_size * 2]; - dpsi[i][2] = results_scratch[i + sposet_padded_size * 3]; - d2psi[i] = results_scratch[i + sposet_padded_size * 4]; - } -} - -template -void SplineC2ROMPTarget::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi_v_list[0].get().size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: pos_copy_ptr[0:num_pos*6]) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, - sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, nComplexBands_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) - { - auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < requested_orb_size; i++) - { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4]; - } - } -} - -template -void SplineC2ROMPTarget::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, - d2psi_v_list); -} - -template -void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(sposet_padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = first_spo; - const auto requested_orb_size = phi_vgl_v.size(2); - const size_t phi_vgl_stride = num_pos * requested_orb_size; - const size_t nComplexBands_local = nComplexBands; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ - map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, - c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr, - offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size; - ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2; - ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4; - - ValueType* restrict out_phi = phi_vgl_ptr + iw * requested_orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (int j = first_real; j < last_real; j++) - { - out_phi[j] = psi[j]; - out_dphi_x[j] = dpsi_x[j]; - out_dphi_y[j] = dpsi_y[j]; - out_dphi_z[j] = dpsi_z[j]; - out_d2phi[j] = d2psi[j]; - - ratio += psi[j] * invRow_iw_ptr[j]; - grad_x += dpsi_x[j] * invRow_iw_ptr[j]; - grad_y += dpsi_y[j] * invRow_iw_ptr[j]; - grad_z += dpsi_z[j] * invRow_iw_ptr[j]; - } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } - - for (int iw = 0; iw < num_pos; iw++) - { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; - } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; - } -} - -template -void SplineC2ROMPTarget::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2ROMPTarget::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2ROMPTarget::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) - { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) - { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], OrbitalSetSize); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], OrbitalSetSize); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], OrbitalSetSize); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } - - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); - } -} - -template class SplineC2ROMPTarget; -template class SplineC2ROMPTarget; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h index e41741e33c..25362d5f2f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h @@ -18,305 +18,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H #define QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Utilities/FairDivide.h" -#include "Utilities/TimerManager.h" -#include -#include "SplineOMPTargetMultiWalkerMem.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs with OpenMP offload - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2ROMPTarget : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; - -private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2ROMPTarget(const std::string& my_name) - : BsplineSet(my_name), - offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), - nComplexBands(0), - GGt_offload(std::make_shared>(9)), - PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2ROMPTarget(const SplineC2ROMPTarget& in); - - virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (uint32_t i = 0; i < 9; i++) - { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) - { - (*mKK)[i] = -dot(kPoints[i], kPoints[i]); - (*myKcart)(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2ROMPTarget; -extern template class SplineC2ROMPTarget; +template +using SplineC2ROMPTarget = SplineC2ROMPTargetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp deleted file mode 100644 index 9498f54f12..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp +++ /dev/null @@ -1,570 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Concurrency/OpenMP.h" -#include "SplineR2R.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/CPU/BLAS.hpp" -#include "CPU/SIMD/inner_product.hpp" - -namespace qmcplusplus -{ -template -SplineR2R::SplineR2R(const SplineR2R& in) = default; - -template -inline void SplineR2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, ispline); -} - -template -bool SplineR2R::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineR2R::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -void SplineR2R::storeParamsBeforeRotation() -{ - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); - - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); -} - -/* - ~~ Notes for rotation ~~ - spl_coefs = Raw pointer to spline coefficients - basis_set_size = Number of spline coefs per orbital - OrbitalSetSize = Number of orbitals (excluding padding) - - spl_coefs has a complicated layout depending on dimensionality of splines. - Luckily, for our purposes, we can think of spl_coefs as pointing to a - matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline - index adjacent in memory. The orbital index is SIMD aligned and therefore - may include padding. - - As a result, due to SIMD alignment, Nsplines may be larger than the - actual number of splined orbitals. This means that in practice rot_mat - may be smaller than the number of 'columns' in the coefs array! - - SplineR2R spl_coef layout: - ^ | sp1 | ... | spN | pad | - | |=====|=====|=====|=====| - | | c11 | ... | c1N | 0 | - basis_set_size | c21 | ... | c2N | 0 | - | | ... | ... | ... | 0 | - | | cM1 | ... | cMN | 0 | - v |=====|=====|=====|=====| - <------ Nsplines ------> - - SplineC2C spl_coef layout: - ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad | - | |=======|=======|=======|=======|=======|=======| - | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 | - basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 | - | | ... | ... | ... | ... | ... | ... | - | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 | - v |=======|=======|=======|=======|=======|=======| - <------------------ Nsplines ------------------> - - NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs - "matrix" is very tall and skinny. -*/ -template -void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto BasisSetSize = coefs_tot_size / Nsplines; - const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - - if constexpr (std::is_same_v) - { - //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster - BLAS::gemm('N', 'N', OrbitalSetSize, BasisSetSize, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize, - coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines); - } - else - { - //Here, ST is float but ValueType is double for R2R. Due to issues with type conversions, just doing naive matrix multiplication in this case to not lose precision on rot_mat - for (IndexType i = 0; i < BasisSetSize; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) - { - const auto cur_elem = Nsplines * i + j; - FullPrecValueType newval{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) - { - const auto index = i * Nsplines + k; - newval += (*coef_copy_)[index] * rot_mat[k][j]; - } - spl_coefs[cur_elem] = newval; - } - } -} - - -template -inline void SplineR2R::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) - const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; -#pragma omp simd - for (size_t j = first; j < last; ++j) - psi[first_spo + j] = signed_one * myV[j]; -} - -template -void SplineR2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last); - } -} - -template -void SplineR2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int last_real = kPoints.size() < last ? kPoints.size() : last; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last_real); - ratios_private[iat][tid] = simd::dot(psi.data() + first, psiinv.data() + first, last_real - first); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -template -inline void SplineR2R::assign_vgl(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t psiIndex = first_spo + j; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); - dpsi[psiIndex][1] = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); - dpsi[psiIndex][2] = signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]); - d2psi[psiIndex] = signed_one * SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG); - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineR2R::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - -#pragma omp simd - for (int psiIndex = first_spo; psiIndex < last_spo; ++psiIndex) - { - const size_t j = psiIndex - first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * g0[j]; - dpsi[psiIndex][1] = signed_one * g1[j]; - dpsi[psiIndex][2] = signed_one * g2[j]; - d2psi[psiIndex] = signed_one * myL[j]; - } -} - -template -void SplineR2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(bc_sign, psi, dpsi, d2psi, first, last); - } -} - -template -void SplineR2R::assign_vgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g00, g01, g02); - const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g00, g01, g02); - const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g10, g11, g12); - const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - grad_grad_psi[psiIndex][0] = signed_one * h_xx_r; - grad_grad_psi[psiIndex][1] = signed_one * h_xy_r; - grad_grad_psi[psiIndex][2] = signed_one * h_xz_r; - grad_grad_psi[psiIndex][3] = signed_one * h_yx_r; - grad_grad_psi[psiIndex][4] = signed_one * h_yy_r; - grad_grad_psi[psiIndex][5] = signed_one * h_yz_r; - grad_grad_psi[psiIndex][6] = signed_one * h_zx_r; - grad_grad_psi[psiIndex][7] = signed_one * h_zy_r; - grad_grad_psi[psiIndex][8] = signed_one * h_zz_r; - } -} - -template -void SplineR2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last); - } -} - -template -void SplineR2R::assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - - //SIMD doesn't work quite right yet. Comment out until further debugging. - //#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const ST val_r = myV[j]; - - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = signed_one * val_r; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - /* const ST h_xx_r=f_xx_r; - const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; - const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; - const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; - const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; - const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ - - grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; - grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; - grad_grad_psi[psiIndex][2] = f_xz_r * signed_one; - grad_grad_psi[psiIndex][4] = f_yy_r * signed_one; - grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; - grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; - - //symmetry: - grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; - grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; - grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i; - const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; - const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; - const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; - const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i; - const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; - const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; - const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; - const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; - const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/ - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; - grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; - grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r; - grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r; - grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; - grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; - - //filling in the symmetric terms. Filling out the xij terms - grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; - - //Now for everything that's a permutation of the above: - grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5]; - - grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8]; - grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8]; - - grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r; - grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r; - grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r; - - grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8]; - grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8]; - - grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r; - } -} - -template -void SplineR2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last); - } -} - -template class SplineR2R; -template class SplineR2R; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h index 3de6fc33fc..ea51f936f2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h @@ -16,208 +16,12 @@ #ifndef QMCPLUSPLUS_SPLINE_R2R_H #define QMCPLUSPLUS_SPLINE_R2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineR2RT.h" namespace qmcplusplus { -/** class to match ST real spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of the sign of the real part of the phase - * Internal storage ST type arrays are aligned and padded. - */ -template -class SplineR2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - bool IsGamma; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - - -protected: - ///primitive cell - CrystalLattice PrimLattice; - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineR2R(const std::string& my_name) : BsplineSet(my_name) {} - - SplineR2R(const SplineR2R& in); - virtual std::string getClassName() const override { return "SplineR2R"; } - virtual std::string getKeyword() const override { return "SplineR2R"; } - bool isComplex() const override { return false; }; - bool isRotationSupported() const override { return true; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - - Some results (using older Berkeley branch) were published in [3]. - - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - const size_t npad = getAlignedSize(n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - - IsGamma = ((HalfG[0] == 0) && (HalfG[1] == 0) && (HalfG[2] == 0)); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - GGt = dot(transpose(PrimLattice.G), PrimLattice.G); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - /** convert position in PrimLattice unit and return sign */ - inline int convertPos(const PointType& r, PointType& ru) - { - ru = PrimLattice.toUnit(r); - int bc_sign = 0; - for (int i = 0; i < D; i++) - if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) - ru[i] = ST(0.0); - else - { - ST img = std::floor(ru[i]); - ru[i] -= img; - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) - const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineR2R; -extern template class SplineR2R; +template +using SplineR2R = SplineR2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index 1e2a841e13..88265ffbec 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -15,8 +15,8 @@ // at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// -#ifndef QMCPLUSPLUS_SPLINE_R2R_H -#define QMCPLUSPLUS_SPLINE_R2R_H +#ifndef QMCPLUSPLUS_SPLINE_R2RT_H +#define QMCPLUSPLUS_SPLINE_R2RT_H #include "OhmmsSoA/VectorSoaContainer.h" #include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h index fae8a80fe1..4758342104 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h @@ -25,274 +25,14 @@ */ #ifndef QMCPLUSPLUS_SPLINESET_READER_H #define QMCPLUSPLUS_SPLINESET_READER_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" -#include "Utilities/FairDivide.h" -#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" -#include "Utilities/ProgressReportEngine.h" -#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" -#include + +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" namespace qmcplusplus { -/** General SplineSetReader to handle any unitcell - */ -template -struct SplineSetReader : public BsplineReaderBase -{ - using splineset_t = SA; - using DataType = typename splineset_t::DataType; - using SplineType = typename splineset_t::SplineType; - - Array, 3> FFTbox; - Array splineData_r, splineData_i; - double rotate_phase_r, rotate_phase_i; - UBspline_3d_d* spline_r; - UBspline_3d_d* spline_i; - splineset_t* bspline; - fftw_plan FFTplan; - - SplineSetReader(EinsplineSetBuilder* e) - : BsplineReaderBase(e), spline_r(nullptr), spline_i(nullptr), bspline(nullptr), FFTplan(nullptr) - {} - - ~SplineSetReader() override { clear(); } - - void clear() - { - einspline::destroy(spline_r); - einspline::destroy(spline_i); - if (FFTplan != nullptr) - fftw_destroy_plan(FFTplan); - FFTplan = nullptr; - } - - // set info for Hybrid - virtual void initialize_hybridrep_atomic_centers() {} - // transform cG to radial functions - virtual void create_atomic_centers_Gspace(Vector>& cG, Communicate& band_group_comm, int iorb) {} - - std::unique_ptr create_spline_set(const std::string& my_name, - int spin, - const BandInfoGroup& bandgroup) override - { - ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); - //Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, c_init; - //double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; - bspline = new splineset_t(my_name); - app_log() << " ClassName = " << bspline->getClassName() << std::endl; - if (bspline->isComplex()) - app_log() << " Using complex einspline table" << std::endl; - else - app_log() << " Using real einspline table" << std::endl; - - // set info for Hybrid - this->initialize_hybridrep_atomic_centers(); - - //baseclass handles twists - check_twists(bspline, bandgroup); - - Ugrid xyz_grid[3]; - - typename splineset_t::BCType xyz_bc[3]; - bool havePsig = set_grid(bspline->HalfG, xyz_grid, xyz_bc); - if (!havePsig) - myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); - bspline->create_spline(xyz_grid, xyz_bc); - - std::ostringstream oo; - oo << bandgroup.myName << ".g" << MeshSize[0] << "x" << MeshSize[1] << "x" << MeshSize[2] << ".h5"; - - const std::string splinefile(oo.str()); - bool root = (myComm->rank() == 0); - int foundspline = 0; - Timer now; - if (root) - { - now.restart(); - hdf_archive h5f(myComm); - foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); - if (foundspline) - { - std::string aname("none"); - foundspline = h5f.readEntry(aname, "class_name"); - foundspline = (aname.find(bspline->getKeyword()) != std::string::npos); - } - if (foundspline) - { - int sizeD = 0; - foundspline = h5f.readEntry(sizeD, "sizeof"); - foundspline = (sizeD == sizeof(typename splineset_t::DataType)); - } - if (foundspline) - { - foundspline = bspline->read_splines(h5f); - if (foundspline) - app_log() << " Successfully restored coefficients from " << splinefile << ". The reading time is " - << now.elapsed() << " sec." << std::endl; - } - h5f.close(); - } - myComm->bcast(foundspline); - if (foundspline) - { - now.restart(); - bspline->bcast_tables(myComm); - app_log() << " SplineSetReader bcast the full table " << now.elapsed() << " sec." << std::endl; - app_log().flush(); - } - else - { - bspline->flush_zero(); - - int nx = MeshSize[0]; - int ny = MeshSize[1]; - int nz = MeshSize[2]; - if (havePsig) //perform FFT using FFTW - { - FFTbox.resize(nx, ny, nz); - FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), - reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); - splineData_r.resize(nx, ny, nz); - if (bspline->isComplex()) - splineData_i.resize(nx, ny, nz); - - TinyVector start(0.0); - TinyVector end(1.0); - spline_r = einspline::create(spline_r, start, end, MeshSize, bspline->HalfG); - if (bspline->isComplex()) - spline_i = einspline::create(spline_i, start, end, MeshSize, bspline->HalfG); - - now.restart(); - initialize_spline_pio_gather(spin, bandgroup); - app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; - - fftw_destroy_plan(FFTplan); - FFTplan = NULL; - } - else //why, don't know - initialize_spline_psi_r(spin, bandgroup); - if (saveSplineCoefs && root) - { - now.restart(); - hdf_archive h5f; - h5f.create(splinefile); - std::string classname = bspline->getClassName(); - h5f.write(classname, "class_name"); - int sizeD = sizeof(typename splineset_t::DataType); - h5f.write(sizeD, "sizeof"); - bspline->write_splines(h5f); - h5f.close(); - app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " - << now.elapsed() << " sec." << std::endl; - } - } - - clear(); - return std::unique_ptr{bspline}; - } - - /** fft and spline cG - * @param cG psi_g to be processed - * @param ti twist index - * @param iorb orbital index - * - * Perform FFT and spline to spline_r and spline_i - */ - inline void fft_spline(Vector>& cG, int ti) - { - unpack4fftw(cG, mybuilder->Gvecs[0], MeshSize, FFTbox); - fftw_execute(FFTplan); - if (bspline->isComplex()) - { - if (rotate) - fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, mybuilder->primcell_kpoints[ti], rotate_phase_r, - rotate_phase_i); - else - { - split_real_components_c2c(FFTbox, splineData_r, splineData_i); - rotate_phase_r = 1.0; - rotate_phase_i = 0.0; - } - einspline::set(spline_r, splineData_r.data()); - einspline::set(spline_i, splineData_i.data()); - } - else - { - fix_phase_rotate_c2r(FFTbox, splineData_r, mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i); - einspline::set(spline_r, splineData_r.data()); - } - } - - - /** initialize the splines - */ - void initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) - { - //distribute bands over processor groups - int Nbands = bandgroup.getNumDistinctOrbitals(); - const int Nprocs = myComm->size(); - const int Nbandgroups = std::min(Nbands, Nprocs); - Communicate band_group_comm(*myComm, Nbandgroups); - std::vector band_groups(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, band_groups); - int iorb_first = band_groups[band_group_comm.getGroupID()]; - int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; - - app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; - hdf_archive h5f(&band_group_comm, false); - Vector> cG(mybuilder->Gvecs[0].size()); - const std::vector& cur_bands = bandgroup.myBands; - if (band_group_comm.isGroupLeader()) - h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); - for (int iorb = iorb_first; iorb < iorb_last; iorb++) - { - if (band_group_comm.isGroupLeader()) - { - int iorb_h5 = bspline->BandIndexMap[iorb]; - int ti = cur_bands[iorb_h5].TwistIndex; - std::string s = psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); - if (!h5f.readEntry(cG, s)) - { - std::ostringstream msg; - msg << "SplineSetReader Failed to read band(s) from h5 file. " - << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; - throw std::runtime_error(msg.str()); - } - double total_norm = compute_norm(cG); - if ((checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) - { - std::ostringstream msg; - msg << "SplineSetReader The orbital " << iorb_h5 << " has a wrong norm " << total_norm - << ", computed from plane wave coefficients!" << std::endl - << "This may indicate a problem with the HDF5 library versions used " - << "during wavefunction conversion or read." << std::endl; - throw std::runtime_error(msg.str()); - } - fft_spline(cG, ti); - bspline->set_spline(spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); - } - this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); - } - - myComm->barrier(); - Timer now; - if (band_group_comm.isGroupLeader()) - { - now.restart(); - bspline->gather_tables(band_group_comm.getGroupLeaderComm()); - app_log() << " Time to gather the table = " << now.elapsed() << std::endl; - } - now.restart(); - bspline->bcast_tables(myComm); - app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; - } +template +using SplineSetReader = SplineSetReaderT; - void initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) - { - // old implementation buried in the history - myComm->barrier_and_abort("SplineSetReaderP initialize_spline_psi_r implementation not finished."); - } -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h index 816561008c..3fa31272e1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h @@ -22,6 +22,10 @@ #include "Utilities/FairDivide.h" #include "mpi/collectives.h" #include "mpi/point2point.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "fftw3.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h index e8a9a4972c..cce9148653 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h @@ -13,42 +13,6 @@ #ifndef QMCPLUSPLUS_CREATE_BSPLINE_READER_H #define QMCPLUSPLUS_CREATE_BSPLINE_READER_H -#include -#include +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" -namespace qmcplusplus -{ -///forward declaration -struct BsplineReaderBase; -class EinsplineSetBuilder; - -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is double - */ -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is float - */ -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles real splines, R2R case - * spline storage and computation precision is double - */ -std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles real splines, R2R case - * spline storage and computation precision is float - */ -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp index 2a20b4a5a4..6e3007323b 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp @@ -129,10 +129,10 @@ struct CreateComplexHelper> if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); } else { @@ -192,7 +192,7 @@ std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetB { return CreateComplexHelper::createDouble(e, hybrid_rep, useGPU); } - +#ifdef QMC_COMPLEX template std::unique_ptr>> createBsplineComplexDoubleT>( EinsplineSetBuilderT>* e, bool hybrid_rep, @@ -203,6 +203,7 @@ template std::unique_ptr>> createBspline bool hybrid_rep, const std::string& useGPU); +#endif template std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetBuilderT* e, bool hybrid_rep, const std::string& useGPU); @@ -220,6 +221,7 @@ std::unique_ptr> createBsplineComplexSingleT(EinsplineSetB return CreateComplexHelper::createSingle(e, hybrid_rep, useGPU); } +#ifdef QMC_COMPLEX template std::unique_ptr>> createBsplineComplexSingleT>( EinsplineSetBuilderT>* e, bool hybrid_rep, @@ -229,6 +231,7 @@ template std::unique_ptr>> createBspline EinsplineSetBuilderT>* e, bool hybrid_rep, const std::string& useGPU); +#endif template std::unique_ptr> createBsplineComplexSingleT(EinsplineSetBuilderT* e, bool hybrid_rep, diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp index 202c79f469..3f21d17818 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp @@ -23,16 +23,16 @@ #include "SplineC2COMPTarget.h" #include "HybridRepCplx.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineComplexDoubleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { using RealType = OHMMS_PRECISION; std::unique_ptr aReader; @@ -45,7 +45,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -56,7 +56,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -69,7 +69,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -80,7 +80,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp index 9711a404fe..021ac94bca 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp @@ -23,16 +23,16 @@ #include "SplineC2COMPTarget.h" #include "HybridRepCplx.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineComplexSingleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { using RealType = OHMMS_PRECISION; std::unique_ptr aReader; @@ -45,7 +45,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -56,7 +56,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -69,7 +69,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -80,7 +80,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp index 1dfd43d5f2..08089fbc6c 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp @@ -18,10 +18,10 @@ #include "SplineR2R.h" #include "HybridRepReal.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { @@ -39,7 +39,7 @@ std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp index 5b5a3a2924..9d6299bd65 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp @@ -18,16 +18,16 @@ #include "SplineR2R.h" #include "HybridRepReal.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineRealSingleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { app_summary() << " Using real valued spline SPOs with real single precision storage (R2R)." << std::endl; if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) @@ -39,7 +39,7 @@ std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index fcc29dbd9a..b99a69ea33 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -23,36 +23,23 @@ add_subdirectory(detail) set(WFBASE_SRCS OptimizableFunctorBase.cpp - VariableSet.cpp VariableSetT.cpp WaveFunctionPool.cpp WaveFunctionComponent.cpp WaveFunctionComponentBuilder.cpp - SPOSetBuilder.cpp SPOSetBuilderT.cpp SPOInfo.cpp SPOSetInfo.cpp SPOSetInputInfo.cpp - SPOSet.cpp SPOSetT.cpp - CompositeSPOSet.cpp CompositeSPOSetT.cpp - HarmonicOscillator/SHOSet.cpp HarmonicOscillator/SHOSetT.cpp - HarmonicOscillator/SHOSetBuilder.cpp HarmonicOscillator/SHOSetBuilderT.cpp ExampleHeBuilder.cpp ExampleHeComponent.cpp RotatedSPOsT.cpp SpinorSetT.cpp) -if(NOT QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp) -endif(NOT QMC_COMPLEX) - -if(QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp SpinorSetT.cpp) -endif(QMC_COMPLEX) ######################## # build jastrows ######################## @@ -71,9 +58,7 @@ set(JASTROW_OMPTARGET_SRCS Jastrow/TwoBodyJastrow.cpp Jastrow/BsplineFunctor.cpp) set(FERMION_SRCS ${FERMION_SRCS} - ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalT.cpp - ElectronGas/FreeOrbitalBuilder.cpp ElectronGas/FreeOrbitalBuilderT.cpp) # wavefunctions only availbale to 3-dim problems @@ -82,46 +67,32 @@ if(OHMMS_DIM MATCHES 3) set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/eeI_JastrowBuilder.cpp Jastrow/CountingJastrowBuilder.cpp) set(FERMION_SRCS ${FERMION_SRCS} - LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalSetT.cpp - LCAO/LCAOrbitalBuilder.cpp LCAO/LCAOrbitalBuilderT.cpp LCAO/MultiQuinticSpline1D.cpp - LCAO/AOBasisBuilder.cpp LCAO/AOBasisBuilderT.cpp - LCAO/SoaLocalizedBasisSet.cpp LCAO/SoaLocalizedBasisSetT.cpp LCAO/LCAOSpinorBuilderT.cpp - LCAO/LCAOrbitalSetWithCorrectionT.cpp - LCAO/CuspCorrectionConstructionT.cpp LCAO/SoaCuspCorrectionT.cpp) - if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp) - else(QMC_COMPLEX) + if(NOT QMC_COMPLEX) #LCAO cusp correction is not ready for complex set(FERMION_SRCS ${FERMION_SRCS} - LCAO/LCAOrbitalSetWithCorrection.cpp - LCAO/CuspCorrectionConstruction.cpp - LCAO/SoaCuspCorrection.cpp) - endif(QMC_COMPLEX) + LCAO/LCAOrbitalSetWithCorrectionT.cpp + LCAO/CuspCorrectionConstructionT.cpp) + endif() if(HAVE_EINSPLINE) set(FERMION_SRCS ${FERMION_SRCS} EinsplineSetBuilderT.cpp - BsplineFactory/EinsplineSetBuilderCommon.cpp - BsplineFactory/EinsplineSetBuilderESHDF.fft.cpp - BsplineFactory/EinsplineSetBuilder_createSPOs.cpp BsplineFactory/createBsplineReaderT.cpp BsplineFactory/createComplexDouble.cpp BsplineFactory/createComplexSingle.cpp - BsplineFactory/HybridRepCenterOrbitals.cpp BsplineFactory/HybridRepCenterOrbitalsT.cpp BandInfo.cpp BsplineFactory/SplineC2RT.cpp BsplineFactory/SplineR2RT.cpp BsplineFactory/SplineC2CT.cpp - BsplineFactory/BsplineReaderBase.cpp BsplineFactory/BsplineReaderBaseT.cpp) set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp @@ -130,26 +101,33 @@ if(OHMMS_DIM MATCHES 3) BsplineFactory/SplineC2COMPTargetT.cpp ) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp BsplineFactory/SplineC2CT.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp BsplineFactory/SplineC2COMPTargetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + EinsplineSpinorSetBuilderT.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} + EinsplineSpinorSetBuilderT.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS}) else(QMC_COMPLEX) set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp - BsplineFactory/createRealDouble.cpp - BsplineFactory/SplineC2R.cpp - BsplineFactory/SplineR2R.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} - BsplineFactory/SplineC2ROMPTarget.cpp) + BsplineFactory/createRealDouble.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS}) endif(QMC_COMPLEX) endif(HAVE_EINSPLINE) # plane wave SPO - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWBasisT.cpp PlaneWave/PWOrbitalSetT.cpp PlaneWave/PWRealOrbitalSetT.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalSetBuilder.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + PlaneWave/PWBasis.cpp + PlaneWave/PWBasisT.cpp + PlaneWave/PWOrbitalSetT.cpp + PlaneWave/PWRealOrbitalSetT.cpp + PlaneWave/PWParameterSet.cpp + PlaneWave/PWOrbitalSetBuilder.cpp + ) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp PlaneWave/PWOrbitalSetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp) else() - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp PlaneWave/PWRealOrbitalSetT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp) endif(QMC_COMPLEX) if(NOT QMC_COMPLEX) @@ -168,7 +146,6 @@ set(FERMION_SRCS Fermion/BackflowTransformation.cpp Fermion/DiracDeterminantWithBackflow.cpp Fermion/SlaterDetWithBackflow.cpp - SPOSetBuilderFactory.cpp SPOSetBuilderFactoryT.cpp TrialWaveFunction.cpp TWFdispatcher.cpp diff --git a/src/QMCWaveFunctions/CompositeSPOSet.cpp b/src/QMCWaveFunctions/CompositeSPOSet.cpp deleted file mode 100644 index 7110a831b1..0000000000 --- a/src/QMCWaveFunctions/CompositeSPOSet.cpp +++ /dev/null @@ -1,197 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "CompositeSPOSet.h" -#include "Utilities/IteratorUtility.h" -#include -#include "OhmmsData/AttributeSet.h" -#include "QMCWaveFunctions/SPOSetBuilderFactory.h" - -namespace qmcplusplus -{ -namespace MatrixOperators -{ -/** copy a small matrix (N, M1) to a big matrix (N, M2), M2>M1 - * @param small input matrix - * @param big outout matrix - * @param offset_c column offset - * - * @todo smater and more efficient matrix, move up for others - * The columns [0,M1) are inserted into [offset_c,offset_c+M1). - */ -template -inline void insert_columns(const MAT1& small, MAT2& big, int offset_c) -{ - const int c = small.cols(); - for (int i = 0; i < small.rows(); ++i) - std::copy(small[i], small[i] + c, big[i] + offset_c); -} -} // namespace MatrixOperators - -CompositeSPOSet::CompositeSPOSet(const std::string& my_name) : SPOSet(my_name) -{ - OrbitalSetSize = 0; - component_offsets.reserve(4); -} - -CompositeSPOSet::CompositeSPOSet(const CompositeSPOSet& other) : SPOSet(other) -{ - for (auto& element : other.components) - { - this->add(element->makeClone()); - } -} - -CompositeSPOSet::~CompositeSPOSet() = default; - -void CompositeSPOSet::add(std::unique_ptr component) -{ - if (components.empty()) - component_offsets.push_back(0); //add 0 - - int norbs = component->size(); - components.push_back(std::move(component)); - component_values.emplace_back(norbs); - component_gradients.emplace_back(norbs); - component_laplacians.emplace_back(norbs); - - OrbitalSetSize += norbs; - component_offsets.push_back(OrbitalSetSize); -} - -void CompositeSPOSet::report() -{ - app_log() << "CompositeSPOSet" << std::endl; - app_log() << " ncomponents = " << components.size() << std::endl; - app_log() << " components" << std::endl; - for (int i = 0; i < components.size(); ++i) - { - app_log() << " " << i << std::endl; - components[i]->basic_report(" "); - } -} - -std::unique_ptr CompositeSPOSet::makeClone() const { return std::make_unique(*this); } - -void CompositeSPOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - int n = 0; - for (int c = 0; c < components.size(); ++c) - { - SPOSet& component = *components[c]; - ValueVector& values = component_values[c]; - component.evaluateValue(P, iat, values); - std::copy(values.begin(), values.end(), psi.begin() + n); - n += component.size(); - } -} - -void CompositeSPOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - int n = 0; - for (int c = 0; c < components.size(); ++c) - { - SPOSet& component = *components[c]; - ValueVector& values = component_values[c]; - GradVector& gradients = component_gradients[c]; - ValueVector& laplacians = component_laplacians[c]; - component.evaluateVGL(P, iat, values, gradients, laplacians); - std::copy(values.begin(), values.end(), psi.begin() + n); - std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n); - std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n); - n += component.size(); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) - { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - ValueMatrix l(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, l); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(l, d2logdet, n); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) - { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - HessMatrix h(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, h); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(h, grad_grad_logdet, n); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); -} - - -std::unique_ptr CompositeSPOSetBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - std::vector spolist; - putContent(spolist, cur); - if (spolist.empty()) - { - return nullptr; - } - - auto spo_now = std::make_unique(getXMLAttributeValue(cur, "name")); - for (int i = 0; i < spolist.size(); ++i) - { - const SPOSet* spo = sposet_builder_factory_.getSPOSet(spolist[i]); - if (spo) - spo_now->add(spo->makeClone()); - } - return (spo_now->size()) ? std::unique_ptr{std::move(spo_now)} : nullptr; -} - -std::unique_ptr CompositeSPOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) -{ - return createSPOSetFromXML(cur); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/CompositeSPOSet.h b/src/QMCWaveFunctions/CompositeSPOSet.h index 1c03eb356f..1663c2f0d8 100644 --- a/src/QMCWaveFunctions/CompositeSPOSet.h +++ b/src/QMCWaveFunctions/CompositeSPOSet.h @@ -15,91 +15,13 @@ #ifndef QMCPLUSPLUS_COMPOSITE_SPOSET_H #define QMCPLUSPLUS_COMPOSITE_SPOSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "Configuration.h" +#include "QMCWaveFunctions/CompositeSPOSetT.h" namespace qmcplusplus { -class CompositeSPOSet : public SPOSet -{ -public: - ///component SPOSets - std::vector> components; - ///temporary storage for values - std::vector component_values; - ///temporary storage for gradients - std::vector component_gradients; - ///temporary storage for laplacians - std::vector component_laplacians; - ///store the precomputed offsets - std::vector component_offsets; - - CompositeSPOSet(const std::string& my_name); - CompositeSPOSet(const CompositeSPOSet& other); - ~CompositeSPOSet() override; - - std::string getClassName() const override { return "CompositeSPOSet"; } - - ///add a sposet component to this composite sposet - void add(std::unique_ptr component); - - ///print out component info - void report(); - - //SPOSet interface methods - ///size is determined by component sposets and nothing else - inline void setOrbitalSetSize(int norbs) override {} - - std::unique_ptr makeClone() const override; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - ///unimplemented functions call this to abort - inline void not_implemented(const std::string& method) - { - APP_ABORT("CompositeSPOSet::" + method + " has not been implemented"); - } - - //methods to be implemented in the future (possibly) - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; -}; - -struct CompositeSPOSetBuilder : public SPOSetBuilder -{ - CompositeSPOSetBuilder(Communicate* comm, const SPOSetBuilderFactory& factory) - : SPOSetBuilder("Composite", comm), sposet_builder_factory_(factory) - {} - - //SPOSetBuilder interface - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - - /// reference to the sposet_builder_factory - const SPOSetBuilderFactory& sposet_builder_factory_; -}; +using CompositeSPOSet = CompositeSPOSetT; +using CompositeSPOSetBuilder = CompositeSPOSetBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp index 31a3f71399..51b01d756f 100644 --- a/src/QMCWaveFunctions/CompositeSPOSetT.cpp +++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp @@ -183,12 +183,6 @@ CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, int first, "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); } -// Class concrete types from ValueType -template class CompositeSPOSetT; -template class CompositeSPOSetT; -template class CompositeSPOSetT>; -template class CompositeSPOSetT>; - template std::unique_ptr> CompositeSPOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) @@ -217,9 +211,24 @@ CompositeSPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) return createSPOSetFromXML(cur); } +// Class concrete types from ValueType + +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class CompositeSPOSetT; template class CompositeSPOSetBuilderT; +#else +template class CompositeSPOSetT; template class CompositeSPOSetBuilderT; +#endif +#else +#ifndef MIXED_PRECISION +template class CompositeSPOSetT>; template class CompositeSPOSetBuilderT>; +#else +template class CompositeSPOSetT>; template class CompositeSPOSetBuilderT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp index 5d60e58d19..41a1ff2076 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp @@ -1586,6 +1586,7 @@ void EinsplineSetBuilderT::createBsplineReader(bool useSingle, bool hybridRep } } +#ifdef QMC_COMPLEX template<> void EinsplineSetBuilderT>::createBsplineReader(bool useSingle, bool hybridRep, @@ -1613,6 +1614,7 @@ void EinsplineSetBuilderT>::createBsplineReader(bool useSin MixedSplineReader = createBsplineComplexDoubleT(this, hybridRep, useGPU); } } +#endif template std::unique_ptr> EinsplineSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) @@ -1788,9 +1790,11 @@ bool EinsplineSetBuilderT::ReadGvectors_ESHDF() return hasPsig; } +//#ifndef QMC_COMPLEX template class EinsplineSetBuilderT; template class EinsplineSetBuilderT; +#ifdef QMC_COMPLEX template class EinsplineSetBuilderT>; template class EinsplineSetBuilderT>; - +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSetBuilderT.h index e38d6a4cc2..cb5643839f 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilderT.h +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.h @@ -27,13 +27,13 @@ #define QMCPLUSPLUS_EINSPLINE_SET_BUILDERT_H #include "QMCWaveFunctions/BandInfo.h" -#include "QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" #include "QMCWaveFunctions/SPOSetBuilderT.h" #include #include -// #define PW_COEFF_NORM_TOLERANCE 1e-6 +#define PW_COEFF_NORM_TOLERANCE 1e-6 class Communicate; @@ -44,81 +44,75 @@ template class BsplineReaderBaseT; // Helper needed for TwistMap -// struct Int3less -// { -// bool -// operator()(const TinyVector& a, const TinyVector& b) -// const -// { -// if (a[0] > b[0]) -// return false; -// if (a[0] < b[0]) -// return true; -// if (a[1] > b[1]) -// return false; -// if (a[1] < b[1]) -// return true; -// if (a[2] > b[2]) -// return false; -// if (a[2] < b[2]) -// return true; -// return false; -// } -// }; -// struct Int4less -// { -// bool -// operator()(const TinyVector& a, const TinyVector& b) -// const -// { -// for (int i = 0; i < 4; i++) { -// if (a[i] > b[i]) -// return false; -// if (a[i] < b[i]) -// return true; -// } -// return false; -// } -// }; +struct Int3less +{ + bool operator()(const TinyVector& a, const TinyVector& b) const + { + if (a[0] > b[0]) + return false; + if (a[0] < b[0]) + return true; + if (a[1] > b[1]) + return false; + if (a[1] < b[1]) + return true; + if (a[2] > b[2]) + return false; + if (a[2] < b[2]) + return true; + return false; + } +}; +struct Int4less +{ + bool operator()(const TinyVector& a, const TinyVector& b) const + { + for (int i = 0; i < 4; i++) + { + if (a[i] > b[i]) + return false; + if (a[i] < b[i]) + return true; + } + return false; + } +}; /** construct a name for spline SPO set */ -// struct H5OrbSet -// { -// /// index for the spin set -// int SpinSet; -// /// number of orbitals that belong to this set -// int NumOrbs; -// /// name of the HDF5 file -// std::filesystem::path FileName; -// /** true if a < b -// * -// * The ordering -// * - name -// * - spin set -// * - number of orbitals -// */ -// bool -// operator()(const H5OrbSet& a, const H5OrbSet& b) const -// { -// if (a.FileName == b.FileName) { -// if (a.SpinSet == b.SpinSet) -// return a.NumOrbs < b.NumOrbs; -// else -// return a.SpinSet < b.SpinSet; -// } -// else -// return a.FileName < b.FileName; -// } - -// H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) : -// SpinSet(spinSet), -// NumOrbs(numOrbs), -// FileName(std::move(name)) -// { -// } -// H5OrbSet() = default; -// }; +struct H5OrbSet +{ + /// index for the spin set + int SpinSet; + /// number of orbitals that belong to this set + int NumOrbs; + /// name of the HDF5 file + std::filesystem::path FileName; + /** true if a < b + * + * The ordering + * - name + * - spin set + * - number of orbitals + */ + bool operator()(const H5OrbSet& a, const H5OrbSet& b) const + { + if (a.FileName == b.FileName) + { + if (a.SpinSet == b.SpinSet) + return a.NumOrbs < b.NumOrbs; + else + return a.SpinSet < b.SpinSet; + } + else + return a.FileName < b.FileName; + } + + H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) + : SpinSet(spinSet), NumOrbs(numOrbs), FileName(std::move(name)) + {} + H5OrbSet() = default; +}; /** EinsplineSet builder */ diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp similarity index 62% rename from src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp rename to src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp index e0cde5fa49..46baf6964a 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp +++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp @@ -17,32 +17,34 @@ ////////////////////////////////////////////////////////////////////////////////////// -#include "EinsplineSpinorSetBuilder.h" -#include "QMCWaveFunctions/SpinorSet.h" +#include "EinsplineSpinorSetBuilderT.h" +#include "QMCWaveFunctions/SpinorSetT.h" #include "OhmmsData/AttributeSet.h" #include "Message/CommOperators.h" #include "Utilities/Timer.h" -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "createBsplineReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" +#include "QMCWaveFunctions/SpinorSet.h" namespace qmcplusplus { -std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> EinsplineSpinorSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { int numOrbs = 0; int sortBands(1); int spinSet = 0; int spinSet2 = 1; - int twist_num_inp = TWISTNUM_NO_INPUT; - TinyVector twist_inp(TWIST_NO_INPUT); + int twist_num_inp = this->TWISTNUM_NO_INPUT; + TinyVector twist_inp(this->TWIST_NO_INPUT); //There have to be two "spin states"... one for the up channel and one for the down channel. // We force this for spinors and manually resize states and FullBands. - states.clear(); - states.resize(2); + this->states.clear(); + this->states.resize(2); - FullBands.resize(2); + this->FullBands.resize(2); SPOSet* UpOrbitalSet; std::string sourceName; @@ -56,20 +58,20 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt { OhmmsAttributeSet a; TinyVector TileFactor_do_not_use; - a.add(H5FileName, "href"); + a.add(this->H5FileName, "href"); a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED); a.add(sortBands, "sort"); - a.add(TileMatrix, "tilematrix"); + a.add(this->TileMatrix, "tilematrix"); a.add(twist_num_inp, "twistnum"); a.add(twist_inp, "twist"); a.add(sourceName, "source"); - a.add(MeshFactor, "meshfactor"); + a.add(this->MeshFactor, "meshfactor"); a.add(hybrid_rep, "hybridrep"); a.add(spo_prec, "precision"); a.add(truncate, "truncate"); - a.add(myName, "tag"); + a.add(this->myName, "tag"); - a.put(XMLRoot); + a.put(this->XMLRoot); a.add(numOrbs, "size"); a.add(numOrbs, "norbs"); a.add(spinSet, "spindataset"); @@ -77,17 +79,17 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt a.put(cur); } - auto pit(ParticleSets.find(sourceName)); - if (pit == ParticleSets.end()) - myComm->barrier_and_abort("Einspline needs the source particleset"); + auto pit(this->ParticleSets.find(sourceName)); + if (pit == this->ParticleSets.end()) + this->myComm->barrier_and_abort("Einspline needs the source particleset"); else - SourcePtcl = pit->second.get(); + this->SourcePtcl = pit->second.get(); /////////////////////////////////////////////// // Read occupation information from XML file // /////////////////////////////////////////////// - const std::vector last_occ(Occ); - Occ.resize(0, 0); // correspond to ground + const std::vector last_occ(this->Occ); + this->Occ.resize(0, 0); // correspond to ground bool NewOcc(false); { @@ -106,33 +108,33 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt if (cname == "occupation") { std::string occ_mode("ground"); - occ_format = "energy"; - particle_hole_pairs = 0; + this->occ_format = "energy"; + this->particle_hole_pairs = 0; OhmmsAttributeSet oAttrib; oAttrib.add(occ_mode, "mode"); oAttrib.add(spinSet, "spindataset"); - oAttrib.add(occ_format, "format"); - oAttrib.add(particle_hole_pairs, "pairs"); + oAttrib.add(this->occ_format, "format"); + oAttrib.add(this->particle_hole_pairs, "pairs"); oAttrib.put(cur); if (occ_mode == "excited") - putContent(Occ, cur); + putContent(this->Occ, cur); else if (occ_mode != "ground") - myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet Only ground state occupation currently " - "supported in EinsplineSetBuilder."); + this->myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet Only ground state occupation currently " + "supported in EinsplineSetBuilder."); } cur = cur->next; } - if (Occ != last_occ) + if (this->Occ != last_occ) { NewOcc = true; } else NewOcc = false; - H5OrbSet aset(H5FileName, spinSet, numOrbs); - const auto iter = SPOSetMap.find(aset); - if ((iter != SPOSetMap.end()) && (!NewOcc)) + H5OrbSet aset(this->H5FileName, spinSet, numOrbs); + const auto iter = this->SPOSetMap.find(aset); + if ((iter != this->SPOSetMap.end()) && (!NewOcc)) app_warning() << "!!!!!!! Identical SPOSets are detected by EinsplineSpinorSetBuilder! " "Implicit sharing one SPOSet for spin-up and spin-down electrons has been removed. " "Each determinant creates its own SPOSet with dedicated memory for spline coefficients. " @@ -141,32 +143,32 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt "and reference it by name on the determinant line." << std::endl; - if (FullBands[spinSet] == 0) - FullBands[spinSet] = std::make_unique>(); + if (this->FullBands[spinSet] == nullptr) + this->FullBands[spinSet] = std::make_unique>(); - if (FullBands[spinSet2] == 0) - FullBands[spinSet2] = std::make_unique>(); + if (this->FullBands[spinSet2] == nullptr) + this->FullBands[spinSet2] = std::make_unique>(); //This is to skip checks on ion-ID's, spin types, etc. If we've made it here, we assume we know better //than Einspline on what the data means... bool skipChecks = true; - set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); + this->set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); ////////////////////////////////// // Create the OrbitalSet object ////////////////////////////////// Timer mytimer; mytimer.restart(); - OccupyBands(spinSet, sortBands, numOrbs, skipChecks); + this->OccupyBands(spinSet, sortBands, numOrbs, skipChecks); if (spinSet == 0) - TileIons(); + this->TileIons(); bool use_single = (spo_prec == "single" || spo_prec == "float"); // safeguard for a removed feature if (truncate == "yes") - myComm->barrier_and_abort( + this->myComm->barrier_and_abort( "The 'truncate' feature of spline SPO has been removed. Please use hybrid orbital representation."); std::string useGPU("no"); @@ -176,7 +178,7 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt if (MixedSplineReader == 0) { if (use_single) - MixedSplineReader = createBsplineRealSingle(this, hybrid_rep == "yes", useGPU); + MixedSplineReader = createBsplineRealSingleT(this, hybrid_rep == "yes", useGPU); else MixedSplineReader = createBsplineRealDouble(this, hybrid_rep == "yes", useGPU); } @@ -184,33 +186,39 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt else #endif { - if (MixedSplineReader == 0) + if (this->MixedSplineReader == nullptr) { if (use_single) - MixedSplineReader = createBsplineComplexSingle(this, hybrid_rep == "yes", useGPU); + this->MixedSplineReader = createBsplineComplexSingleT(this, hybrid_rep == "yes", useGPU); else - MixedSplineReader = createBsplineComplexDouble(this, hybrid_rep == "yes", useGPU); + this->MixedSplineReader = createBsplineComplexDoubleT(this, hybrid_rep == "yes", useGPU); } } - MixedSplineReader->setCommon(XMLRoot); + this->MixedSplineReader->setCommon(this->XMLRoot); //Norm for spinor wavefunctions is different from SPO's by a factor of sqrt(2). Disable the unit norm check. - MixedSplineReader->setCheckNorm(false); + this->MixedSplineReader->setCheckNorm(false); //Set no rotation to the orbitals - MixedSplineReader->setRotate(false); + this->MixedSplineReader->setRotate(false); //Make the up spin set. - bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0); - auto bspline_zd_u = MixedSplineReader->create_spline_set(spinSet, spo_cur); + this->bcastSortBands(spinSet, this->NumDistinctOrbitals, this->myComm->rank() == 0); + auto bspline_zd_u = this->MixedSplineReader->create_spline_set(spinSet, spo_cur); //Make the down spin set. - OccupyBands(spinSet2, sortBands, numOrbs, skipChecks); - bcastSortBands(spinSet2, NumDistinctOrbitals, myComm->rank() == 0); - auto bspline_zd_d = MixedSplineReader->create_spline_set(spinSet2, spo_cur); + this->OccupyBands(spinSet2, sortBands, numOrbs, skipChecks); + this->bcastSortBands(spinSet2, this->NumDistinctOrbitals, this->myComm->rank() == 0); + auto bspline_zd_d = this->MixedSplineReader->create_spline_set(spinSet2, spo_cur); //register with spin set and we're off to the races. auto spinor_set = std::make_unique(spo_object_name); spinor_set->set_spos(std::move(bspline_zd_u), std::move(bspline_zd_d)); return spinor_set; }; + +#ifndef MIXED_PRECISION +template class EinsplineSpinorSetBuilderT>; +#else +template class EinsplineSpinorSetBuilderT>; +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h new file mode 100644 index 0000000000..c55ef7fd62 --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h @@ -0,0 +1,54 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file EinsplineSpinorSetBuilderT.h + * + * Derives EinsplineSetBuilder. Overrides the createSPOSetFromXML method to read an up and down channel from hdf5 + * and then construct an appropriate einspline spinor set object. + * + */ +#ifndef QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDERT_H +#define QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDERT_H + +#include "QMCWaveFunctions/SPOSetT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +class Communicate; + +namespace qmcplusplus +{ + +template +class EinsplineSpinorSetBuilderT : public EinsplineSetBuilderT +{ + using ParticleSet = ParticleSetT; + using SPOSet = SPOSetT; + using PSetMap = std::map>; + +public: + ///constructor + EinsplineSpinorSetBuilderT(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur) + : EinsplineSetBuilderT(p, psets, comm, cur){}; + + ///destructor + ~EinsplineSpinorSetBuilderT() override{}; + + /** initialize the Antisymmetric wave function for electrons + * @param cur the current xml node + */ + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; +}; + +} // namespace qmcplusplus + + +#endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp deleted file mode 100644 index 3727a1e2e6..0000000000 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp +++ /dev/null @@ -1,264 +0,0 @@ -#include "FreeOrbital.h" - -namespace qmcplusplus -{ -FreeOrbital::FreeOrbital(const std::string& my_name, const std::vector& kpts_cart) - : SPOSet(my_name), - kvecs(kpts_cart), -#ifdef QMC_COMPLEX - mink(0), // first k at twist may not be 0 -#else - mink(1), // treat k=0 as special case -#endif - maxk(kpts_cart.size()) -{ -#ifdef QMC_COMPLEX - OrbitalSetSize = maxk; -#else - OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split -#endif - k2neg.resize(maxk); - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); -} - -FreeOrbital::~FreeOrbital() {} - -void FreeOrbital::evaluateVGL(const ParticleSet& P, int iat, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) -{ - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; -#endif - } -#ifndef QMC_COMPLEX - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; -#endif -} - -void FreeOrbital::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) -{ - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - pvec[ik] = ValueType(coskr, sinkr); -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; -#endif - } -#ifndef QMC_COMPLEX - pvec[0] = 1.0; -#endif -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) -{ - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - ValueVector d2p(d2phi[i], OrbitalSetSize); - evaluateVGL(P, iat, p, dp, d2p); - } -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) -{ - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - // phi(r) = cos(kr)+i*sin(kr) - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - // i*phi(r) = -sin(kr) + i*cos(kr) - dp[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - } - } -#endif - } -#ifndef QMC_COMPLEX - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; -#endif - } -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) -{ - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } - for (int la = 0; la < OHMMS_DIM; la++) - { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) - { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } - } - } -#endif - } -#ifndef QMC_COMPLEX - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; -#endif - } -} - -void FreeOrbital::report(const std::string& pad) const -{ - app_log() << pad << "FreeOrbital report" << std::endl; - for (int ik = 0; ik < kvecs.size(); ik++) - { - app_log() << pad << ik << " " << kvecs[ik] << std::endl; - } - app_log() << pad << "end FreeOrbital report" << std::endl; - app_log().flush(); -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h index 0cbb684545..3901e8dd3f 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h @@ -17,59 +17,12 @@ #ifndef QMCPLUSPLUS_FREE_ORBITAL #define QMCPLUSPLUS_FREE_ORBITAL -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalT.h" namespace qmcplusplus { -class FreeOrbital : public SPOSet -{ -public: - FreeOrbital(const std::string& my_name, const std::vector& kpts_cart); - ~FreeOrbital(); - - std::string getClassName() const override { return "FreeOrbital"; } - - // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j - // i \in [first, last) - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) override; - - // plug r_i into all orbitals - void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) override; - void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) override; - - // hessian matrix is needed by backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) override; - - // derivative of hessian is needed to optimize backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) override; - - void report(const std::string& pad) const override; - // ---- begin required overrides - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - void setOrbitalSetSize(int norbs) override { throw std::runtime_error("not implemented"); } - // required overrides end ---- -private: - const std::vector kvecs; // kvecs vectors - const int mink; // minimum k index - const int maxk; // maximum number of kvecs vectors - std::vector k2neg; // minus kvecs^2 -}; +using FreeOrbital = FreeOrbitalT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp deleted file mode 100644 index 5861dc9d0f..0000000000 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "OhmmsData/AttributeSet.h" -#include "LongRange/StructFact.h" -#include "LongRange/KContainer.h" -#include "QMCWaveFunctions/ElectronGas/FreeOrbital.h" -#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h" - -namespace qmcplusplus -{ -FreeOrbitalBuilder::FreeOrbitalBuilder(ParticleSet& els, Communicate* comm, xmlNodePtr cur) - : SPOSetBuilder("PW", comm), targetPtcl(els) -{} - -std::unique_ptr FreeOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - int norb = -1; - std::string spo_object_name; - PosType twist(0.0); - OhmmsAttributeSet attrib; - attrib.add(norb, "size"); - attrib.add(twist, "twist"); - attrib.add(spo_object_name, "name"); - attrib.put(cur); - - if (norb < 0) - throw std::runtime_error("free orbital SPO set require the \"size\" input"); - - auto lattice = targetPtcl.getLattice(); - - PosType tvec = lattice.k_cart(twist); -#ifdef QMC_COMPLEX - const int npw = norb; - targetPtcl.setTwist(twist); - app_log() << "twist fraction = " << twist << std::endl; - app_log() << "twist cartesian = " << tvec << std::endl; -#else - const int npw = std::ceil((norb + 1.0) / 2); - if (2 * npw - 1 != norb) - { - std::ostringstream msg; - msg << "norb = " << norb << " npw = " << npw; - msg << " cannot be ran in real PWs (sin, cos)" << std::endl; - msg << "either use complex build or change the size of SPO set" << std::endl; - msg << "ideally, set size to a closed shell of PWs." << std::endl; - throw std::runtime_error(msg.str()); - } - for (int ldim = 0; ldim < twist.size(); ldim++) - { - if (std::abs(twist[ldim]) > 1e-16) - throw std::runtime_error("no twist for real orbitals"); - } -#endif - - // extract npw k-points from container - // kpts_cart is sorted by magnitude - std::vector kpts(npw); - KContainer klists; - RealType kcut = lattice.LR_kc; // to-do: reduce kcut to >~ kf - klists.updateKLists(lattice, kcut, lattice.ndim, twist); - - // k0 is not in kpts_cart - kpts[0] = tvec; -#ifdef QMC_COMPLEX - for (int ik = 1; ik < npw; ik++) - { - kpts[ik] = klists.kpts_cart[ik - 1]; - } -#else - const int nktot = klists.kpts.size(); - std::vector mkidx(npw, 0); - int ik = 1; - for (int jk = 0; jk < nktot; jk++) - { - // check if -k is already chosen - const int jmk = klists.minusk[jk]; - if (in_list(jk, mkidx)) - continue; - // if not, then add this kpoint - kpts[ik] = klists.kpts_cart[jk]; - mkidx[ik] = jmk; // keep track of its minus - ik++; - if (ik >= npw) - break; - } -#endif - auto sposet = std::make_unique(spo_object_name, kpts); - sposet->report(" "); - return sposet; -} - -bool FreeOrbitalBuilder::in_list(const int j, const std::vector l) -{ - for (int i = 0; i < l.size(); i++) - { - if (j == l[i]) - return true; - } - return false; -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h index b193c67c66..95eb8b6c2a 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h @@ -1,21 +1,11 @@ #ifndef QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H #define QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" namespace qmcplusplus { -class FreeOrbitalBuilder : public SPOSetBuilder -{ -public: - FreeOrbitalBuilder(ParticleSet& els, Communicate* comm, xmlNodePtr cur); - ~FreeOrbitalBuilder() {} - - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -private: - ParticleSet& targetPtcl; - bool in_list(const int j, const std::vector l); -}; -} // namespace qmcplusplus +using FreeOrbitalBuilder = FreeOrbitalBuilderT; +} #endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp index 497f65227e..bc4bec5408 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp @@ -155,12 +155,7 @@ FreeOrbitalT>::evaluateValue( RealType sinkr, coskr; for (int ik = mink; ik < maxk; ik++) { sincos(dot(kvecs[ik], r), &sinkr, &coskr); - pvec[ik] = std::complex(coskr, sinkr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; } } @@ -173,12 +168,7 @@ FreeOrbitalT>::evaluateValue( RealType sinkr, coskr; for (int ik = mink; ik < maxk; ik++) { sincos(dot(kvecs[ik], r), &sinkr, &coskr); - pvec[ik] = std::complex(coskr, sinkr); - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; } } diff --git a/src/QMCWaveFunctions/ExampleHeComponent.h b/src/QMCWaveFunctions/ExampleHeComponent.h index e8478dc44a..3199e33f9d 100644 --- a/src/QMCWaveFunctions/ExampleHeComponent.h +++ b/src/QMCWaveFunctions/ExampleHeComponent.h @@ -32,7 +32,7 @@ class ExampleHeComponent : public WaveFunctionComponent, OptimizableObject my_table_ee_idx_(els.addTable(els, DTModes::NEED_TEMP_DATA_ON_HOST | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), my_table_ei_idx_(els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST)){}; - using OptVariablesType = optimize::VariableSet; + //using OptVariablesType = optimize::VariableSet; using PtclGrpIndexes = QMCTraits::PtclGrpIndexes; std::string getClassName() const override { return "ExampleHeComponent"; } diff --git a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h index 5178de3f5c..0fb838f063 100644 --- a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h +++ b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h @@ -26,10 +26,6 @@ namespace qmcplusplus class Backflow_ee_kSpace : public BackflowFunctionBase { using ComplexType = QMCTraits::ComplexType; - ///typedef for real values - //using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; public: //number of groups of the target particleset diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h index e7513eeeae..3e50ea7f0a 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h +++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h @@ -19,7 +19,10 @@ #include #include "Configuration.h" #include "WaveFunctionComponentBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include +#include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "QMCWaveFunctions/SPOSetBuilder.h" namespace qmcplusplus { @@ -28,9 +31,6 @@ class BackflowTransformation; class DiracDeterminantBase; class MultiSlaterDetTableMethod; struct CSFData; -class SPOSet; -class SPOSetBuilder; -class SPOSetBuilderFactory; struct ci_configuration; /** derived class from WaveFunctionComponentBuilder diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp deleted file mode 100644 index 3a6e5872e1..0000000000 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp +++ /dev/null @@ -1,573 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SHOSet.h" -#include "Utilities/string_utils.h" - -namespace qmcplusplus -{ -SHOSet::SHOSet(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states) - : SPOSet(my_name), length(l), center(c) -{ - state_info.resize(sho_states.size()); - for (int s = 0; s < sho_states.size(); ++s) - state_info[s] = *sho_states[s]; - initialize(); -} - - -void SHOSet::initialize() -{ - using std::sqrt; - - OrbitalSetSize = state_info.size(); - - qn_max = -1; - for (int s = 0; s < state_info.size(); ++s) - for (int d = 0; d < DIM; ++d) - qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); - qn_max += 1; - - nmax = -1; - for (int d = 0; d < DIM; ++d) - nmax = std::max(nmax, qn_max[d]); - - prefactors.resize(nmax); - hermite.resize(DIM, nmax); - bvalues.resize(DIM, nmax); - - //d0_values.resize(DIM,nmax); - //d1_values.resize(DIM,nmax); - //d2_values.resize(DIM,nmax); - - if (nmax > 0) - { - prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < nmax; ++n) - prefactors[n] = prefactors[n - 1] / sqrt(2. * n); - } -} - - -SHOSet::~SHOSet() {} - - -std::unique_ptr SHOSet::makeClone() const { return std::make_unique(*this); } - - -void SHOSet::report(const std::string& pad) const -{ - app_log() << pad << "SHOSet report" << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " qn_max = " << qn_max << std::endl; - app_log() << pad << " # states = " << state_info.size() << std::endl; - app_log() << pad << " states" << std::endl; - for (int s = 0; s < state_info.size(); ++s) - state_info[s].sho_report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSet report" << std::endl; - app_log().flush(); -} - - -void SHOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], size()); - evaluate_v(r, p); -} - - -void SHOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], size()); - GradVector dp(&dpsi[0], size()); - ValueVector d2p(&d2psi[0], size()); - evaluate_vgl(r, p, dp, d2p); -} - - -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector p(logdet[i], size()); - GradVector dp(dlogdet[i], size()); - ValueVector d2p(d2logdet[i], size()); - evaluate_vgl(P.R[iat], p, dp, d2p); - } -} - - -void SHOSet::evaluate_v(PosType r, ValueVector& psi) -{ - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); -} - - -void SHOSet::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - PosType x = (r - center) / length; - evaluate_hermite(x); - evaluate_d0(x, psi); - evaluate_d1(x, psi, dpsi); - evaluate_d2(x, psi, d2psi); -} - - -void SHOSet::evaluate_hermite(const PosType& xpos) -{ - for (int d = 0; d < DIM; ++d) - { - int nh = qn_max[d]; - if (nh > 0) - { - RealType x = xpos[d]; - hermite(d, 0) = 1.0; - RealType Hnm2 = 0.0; - RealType Hnm1 = 1.0; - for (int n = 1; n < nh; ++n) - { - RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2); - hermite(d, n) = Hn; - Hnm2 = Hnm1; - Hnm1 = Hn; - } - } - } -} - - -void SHOSet::evaluate_d0(const PosType& xpos, ValueVector& psi) -{ - using std::exp; - for (int d = 0; d < DIM; ++d) - { - RealType x = xpos[d]; - RealType g = exp(-.5 * x * x); - for (int n = 0; n < qn_max[d]; ++n) - { - bvalues(d, n) = prefactors[n] * g * hermite(d, n); - } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - RealType phi = 1.0; - for (int d = 0; d < DIM; ++d) - phi *= bvalues(d, state.quantum_number[d]); - psi[s] = phi; - } -} - - -void SHOSet::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) -{ - RealType ol = 1.0 / length; - for (int d = 0; d < DIM; ++d) - { - RealType x = xpos[d]; - RealType Hnm1 = 0.0; - for (int n = 0; n < qn_max[d]; ++n) - { - RealType Hn = hermite(d, n); - bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol; - Hnm1 = Hn; - } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - TinyVector dphi; - for (int d = 0; d < DIM; ++d) - dphi[d] = bvalues(d, state.quantum_number[d]); - dphi *= psi[s]; - dpsi[s] = dphi; - } -} - - -void SHOSet::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi) -{ - RealType ol2 = 1.0 / (length * length); - for (int d = 0; d < DIM; ++d) - { - RealType x = xpos[d]; - RealType x2 = x * x; - for (int n = 0; n < qn_max[d]; ++n) - { - bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2; - } - } - for (int s = 0; s < state_info.size(); ++s) - { - const SHOState& state = state_info[s]; - ValueType d2phi = 0.0; - for (int d = 0; d < DIM; ++d) - d2phi += bvalues(d, state.quantum_number[d]); - d2phi *= psi[s]; - d2psi[s] = d2phi; - } -} - - -void SHOSet::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - using std::exp; - using std::sqrt; - - evaluate_vgl(r, psi, dpsi, d2psi); - - const int N = 6; - RealType H[N], dH[N], d2H[N], pre[N]; - RealType p[N], dp[N], d2p[N]; - - pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length)); - for (int n = 1; n < N; ++n) - pre[n] = pre[n - 1] / sqrt(2. * n); - - for (int d = 0; d < DIM; ++d) - { - RealType x = (r[d] - center[d]) / length; - RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x; - H[0] = 1; - dH[0] = 0; - d2H[0] = 0; - H[1] = 2 * x; - dH[1] = 2; - d2H[1] = 0; - H[2] = 4 * x2 - 2; - dH[2] = 8 * x; - d2H[2] = 8; - H[3] = 8 * x3 - 12 * x; - dH[3] = 24 * x2 - 12; - d2H[3] = 48 * x; - H[4] = 16 * x4 - 48 * x2 + 12; - dH[4] = 64 * x3 - 96 * x; - d2H[4] = 192 * x2 - 96; - H[5] = 32 * x5 - 160 * x3 + 120 * x; - dH[5] = 160 * x4 - 480 * x2 + 120; - d2H[5] = 640 * x3 - 960 * x; - RealType g = exp(-x2 / 2); - for (int n = 0; n < N; ++n) - { - p[n] = pre[n] * g * H[n]; - dp[n] = pre[n] * g * (-x * H[n] + dH[n]); - d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]); - } - app_log() << "eval check dim = " << d << " x = " << x << std::endl; - app_log() << " hermite check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << H[n] << std::endl; - app_log() << " " << n << " " << hermite(d, n) << std::endl; - } - app_log() << " phi d0 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << p[n] << std::endl; - app_log() << " " << n << " " << d0_values(d, n) << std::endl; - } - app_log() << " phi d1 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << dp[n] / p[n] << std::endl; - app_log() << " " << n << " " << d1_values(d, n) << std::endl; - } - app_log() << " phi d2 check" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - { - app_log() << " " << n << " " << d2p[n] / p[n] << std::endl; - app_log() << " " << n << " " << d2_values(d, n) << std::endl; - } - } -} - - -void SHOSet::test_derivatives() -{ - int n = 3; - PosType c = 5.123; - PosType L = 1.0; - PosType drg = L / n; - PosType dr = L / 1000; - int nphi = state_info.size(); - - PosType o2dr, odr2; - - ValueVector vpsi, vpsitmp; - GradVector vdpsi, vdpsin; - ValueVector vd2psi, vd2psin; - - - vpsi.resize(nphi); - vdpsi.resize(nphi); - vd2psi.resize(nphi); - - vpsitmp.resize(nphi); - vdpsin.resize(nphi); - vd2psin.resize(nphi); - - - ValueVector psi(&vpsi[0], size()); - GradVector dpsi(&vdpsi[0], size()); - ValueVector d2psi(&vd2psi[0], size()); - - ValueVector psitmp(&vpsitmp[0], size()); - GradVector dpsin(&vdpsin[0], size()); - ValueVector d2psin(&vd2psin[0], size()); - - - app_log() << " loading dr" << std::endl; - - RealType odr2sum = 0.0; - for (int d = 0; d < DIM; ++d) - { - RealType odr = 1.0 / dr[d]; - o2dr[d] = .5 * odr; - odr2[d] = odr * odr; - odr2sum += odr2[d]; - } - - app_log() << "SHOSet::test_derivatives" << std::endl; - - const SimulationCell simulation_cell; - ParticleSet Ps(simulation_cell); - - int p = 0; - PosType r, rtmp; - for (int i = 0; i < n; ++i) - { - r[0] = c[0] + i * drg[0]; - for (int j = 0; j < n; ++j) - { - r[1] = c[1] + j * drg[1]; - for (int k = 0; k < n; ++k) - { - r[2] = c[2] + k * drg[2]; - - //evaluate_check(r,psi,dpsi,d2psi); - //APP_ABORT("SHOSet eval check"); - - evaluate_vgl(r, psi, dpsi, d2psi); - - for (int m = 0; m < nphi; ++m) - d2psin[m] = -2 * odr2sum * psi[m]; - for (int d = 0; d < DIM; ++d) - { - rtmp = r; - rtmp[d] += dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) - { - ValueType phi = psitmp[m]; - dpsin[m][d] = phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } - rtmp = r; - rtmp[d] -= dr[d]; - evaluate_v(rtmp, psitmp); - for (int m = 0; m < nphi; ++m) - { - ValueType phi = psitmp[m]; - dpsin[m][d] -= phi * o2dr[d]; - d2psin[m] += phi * odr2[d]; - } - } - - RealType dphi_diff = 0.0; - RealType d2phi_diff = 0.0; - for (int m = 0; m < nphi; ++m) - for (int d = 0; d < DIM; ++d) - dphi_diff = std::max(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d])); - for (int m = 0; m < nphi; ++m) - d2phi_diff = std::max(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); - app_log() << " " << p << " " << dphi_diff << " " << d2phi_diff << std::endl; - app_log() << " derivatives" << std::endl; - for (int m = 0; m < nphi; ++m) - { - std::string qn = ""; - for (int d = 0; d < DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() << " " << qn; - for (int d = 0; d < DIM; ++d) - app_log() << real(dpsi[m][d]) << " "; - app_log() << std::endl; - app_log() << " " << qn; - for (int d = 0; d < DIM; ++d) - app_log() << real(dpsin[m][d]) << " "; - app_log() << std::endl; - } - app_log() << " laplacians" << std::endl; - PosType x = r / length; - for (int m = 0; m < nphi; ++m) - { - std::string qn = ""; - for (int d = 0; d < DIM; ++d) - qn += int2string(state_info[m].quantum_number[d]) + " "; - app_log() << " " << qn << real(d2psi[m] / psi[m]) << std::endl; - app_log() << " " << qn << real(d2psin[m] / psi[m]) << std::endl; - } - p++; - } - } - } - - app_log() << "end SHOSet::test_derivatives" << std::endl; -} - - -void SHOSet::test_overlap() -{ - app_log() << "SHOSet::test_overlap" << std::endl; - - - //linear - int d = 0; - - app_log() << " length = " << length << std::endl; - app_log() << " prefactors" << std::endl; - for (int n = 0; n < qn_max[d]; ++n) - app_log() << " " << n << " " << prefactors[n] << std::endl; - - app_log() << " 1d overlap" << std::endl; - - ValueVector vpsi; - vpsi.resize(size()); - ValueVector psi(&vpsi[0], size()); - - double xmax = 4.0; - double dx = .1; - double dr = length * dx; - - int nphi = qn_max[d]; - Array omat; - omat.resize(nphi, nphi); - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; - - PosType xp = 0.0; - for (double x = -xmax; x < xmax; x += dx) - { - xp[d] = x; - evaluate_hermite(xp); - evaluate_d0(xp, psi); - - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr; - } - - for (int i = 0; i < nphi; ++i) - { - app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } - app_log() << std::endl; - - - //volumetric - app_log() << " 3d overlap" << std::endl; - double dV = dr * dr * dr; - nphi = size(); - omat.resize(nphi, nphi); - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) = 0.0; - for (double x = -xmax; x < xmax; x += dx) - for (double y = -xmax; y < xmax; y += dx) - for (double z = -xmax; z < xmax; z += dx) - { - xp[0] = x; - xp[1] = y; - xp[2] = z; - evaluate_hermite(xp); - evaluate_d0(xp, psi); - - for (int i = 0; i < nphi; ++i) - for (int j = 0; j < nphi; ++j) - omat(i, j) += std::abs(psi[i] * psi[j]) * dV; - } - for (int i = 0; i < nphi; ++i) - { - app_log() << std::endl; - for (int j = 0; j < nphi; ++j) - app_log() << omat(i, j) << " "; - } - app_log() << std::endl; - - - app_log() << "end SHOSet::test_overlap" << std::endl; -} - - -void SHOSet::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) -{ - not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); -} - -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); -} - -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); -} - -void SHOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); -} - -void SHOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h index 3503449b7f..cfce7722a2 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h @@ -14,132 +14,12 @@ #ifndef QMCPLUSPLUS_SHOSET_H #define QMCPLUSPLUS_SHOSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/SPOInfo.h" - +#include "Configuration.h" +#include "QMCWaceFunctions/HarmonicOscillator/SHOSetT.h" namespace qmcplusplus { -struct SHOState : public SPOInfo -{ - TinyVector quantum_number; - - SHOState() - { - quantum_number = -1; - energy = 0.0; - } - - ~SHOState() override {} - - inline void set(TinyVector qn, RealType e) - { - quantum_number = qn; - energy = e; - } - - inline void sho_report(const std::string& pad = "") const - { - app_log() << pad << "qn=" << quantum_number << " e=" << energy << std::endl; - } -}; - - -struct SHOSet : public SPOSet -{ - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - - RealType length; - PosType center; - - int nmax; - TinyVector qn_max; - std::vector state_info; - std::vector prefactors; - Array hermite; - Array bvalues; - Array d0_values; - Array d1_values; - Array d2_values; - - //construction/destruction - SHOSet(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states); - - ~SHOSet() override; - - std::string getClassName() const override { return "SHOSet"; } - - void initialize(); - - //SPOSet interface methods - std::unique_ptr makeClone() const override; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - - //local functions - void evaluate_v(PosType r, ValueVector& psi); - void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void evaluate_hermite(const PosType& xpos); - void evaluate_d0(const PosType& xpos, ValueVector& psi); - void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); - void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); - void report(const std::string& pad = "") const override; - void test_derivatives(); - void test_overlap(); - void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - //empty methods - /// number of orbitals is determined only by initial request - inline void setOrbitalSetSize(int norbs) override {} - - ///unimplemented functions call this to abort - inline void not_implemented(const std::string& method) - { - APP_ABORT("SHOSet::" + method + " has not been implemented."); - } - - - //methods to be implemented in the future (possibly) - void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& dphi, - HessMatrix& ddphi, - GradMatrix& dlapl_phi) override; -}; +using SHOSet = SHOSetT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp deleted file mode 100644 index bc3adf1d7a..0000000000 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp +++ /dev/null @@ -1,217 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SHOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetInputInfo.h" -#include "OhmmsData/AttributeSet.h" -#include "Utilities/IteratorUtility.h" -#include "Utilities/string_utils.h" - - -namespace qmcplusplus -{ -SHOSetBuilder::SHOSetBuilder(ParticleSet& P, Communicate* comm) : SPOSetBuilder("SHO", comm), Ps(P) -{ - ClassName = "SHOSetBuilder"; - legacy = false; - app_log() << "Constructing SHOSetBuilder" << std::endl; - reset(); -} - - -SHOSetBuilder::~SHOSetBuilder() {} - - -void SHOSetBuilder::reset() -{ - nstates = 0; - mass = -1.0; - energy = -1.0; - length = -1.0; - center = 0.0; -} - - -std::unique_ptr SHOSetBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - APP_ABORT("SHOSetBuilder::createSPOSetFromXML SHOSetBuilder should not use legacy interface"); - - app_log() << "SHOSetBuilder::createSHOSet(xml) " << std::endl; - - SPOSetInputInfo input(cur); - - return createSPOSet(cur, input); -} - - -std::unique_ptr SHOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) -{ - app_log() << "SHOSetBuilder::createSHOSet(indices) " << std::endl; - - using std::ceil; - using std::sqrt; - - reset(); - - // read parameters - std::string spo_name = "sho"; - OhmmsAttributeSet attrib; - attrib.add(spo_name, "name"); - attrib.add(spo_name, "id"); - attrib.add(mass, "mass"); - attrib.add(energy, "energy"); - attrib.add(energy, "frequency"); - attrib.add(length, "length"); - attrib.add(center, "center"); - attrib.add(nstates, "size"); - attrib.put(cur); - - if (energy < 0.0) - energy = 1.0; - if (mass < 0.0 && length < 0.0) - length = 1.0; - if (mass < 0.0) - mass = 1.0 / (energy * length * length); - else if (length < 0.0) - length = 1.0 / sqrt(mass * energy); - - // initialize states and/or adjust basis - int smax = -1; - if (input.has_index_info) - smax = std::max(smax, input.max_index()); - if (input.has_energy_info) - { - smax = std::max(smax, (int)ceil(input.max_energy() / energy)); - } - if (smax < 0) - APP_ABORT("SHOSetBuilder::Initialize\n invalid basis size"); - update_basis_states(smax); - - // create sho state request - indices_t& indices = input.get_indices(states); - std::vector sho_states; - for (int i = 0; i < indices.size(); ++i) - sho_states.push_back(basis_states[indices[i]]); - - // make the sposet - auto sho = std::make_unique(spo_name, length, center, sho_states); - - sho->report(" "); - //sho->test_derivatives(); - //sho->test_overlap(); - //APP_ABORT("SHOSetBuilder check"); - - return sho; -} - - -void SHOSetBuilder::update_basis_states(int smax) -{ - using std::ceil; - using std::exp; - using std::log; - using std::sort; - using std::sqrt; - - int states_required = smax - basis_states.size() + 1; - if (states_required > 0) - { - RealType N = smax + 1; - if (DIM == 1) - nmax = smax; - else if (DIM == 2) - nmax = ceil(.5 * sqrt(8. * N + 1.) - 1.5); - else if (DIM == 3) - { - RealType f = exp(1.0 / 3.0 * log(81. * N + 3. * sqrt(729. * N * N - 3.))); - nmax = ceil(f / 3. + 1. / f - 2.); - } - else - APP_ABORT("SHOSetBuilder::update_basis_states dimensions other than 1, 2, or 3 are not supported"); - int ndim = nmax + 1; - ind_dims[DIM - 1] = 1; - for (int d = DIM - 2; d > -1; --d) - ind_dims[d] = ind_dims[d + 1] * ndim; - int s = 0; - int ntot = pow(ndim, DIM); - TinyVector qnumber; - for (int m = 0; m < ntot; ++m) - { - int n = 0; // principal quantum number - int nrem = m; - for (int d = 0; d < DIM; ++d) - { - int i = nrem / ind_dims[d]; - nrem -= i * ind_dims[d]; - qnumber[d] = i; - n += i; - } - if (n <= nmax) - { - SHOState* st; - if (s < basis_states.size()) - st = basis_states[s]; - else - { - st = new SHOState(); - basis_states.add(st); - } - RealType e = energy * (n + .5 * DIM); - st->set(qnumber, e); - s++; - } - } - basis_states.energy_sort(1e-6, true); - } - - // reset energy scale even if no states need to be added - for (int i = 0; i < basis_states.size(); ++i) - { - SHOState& state = *basis_states[i]; - const TinyVector& qnumber = state.quantum_number; - int n = 0; - for (int d = 0; d < DIM; ++d) - n += qnumber[d]; - state.energy = energy * (n + .5 * DIM); - } - - //somewhat redundant, but necessary - clear_states(0); - states[0]->finish(basis_states.states); - - if (basis_states.size() <= smax) - APP_ABORT("SHOSetBuilder::update_basis_states failed to make enough states"); -} - - -void SHOSetBuilder::report(const std::string& pad) -{ - app_log() << pad << "SHOSetBuilder report" << std::endl; - app_log() << pad << " dimension = " << DIM << std::endl; - app_log() << pad << " mass = " << mass << std::endl; - app_log() << pad << " frequency = " << energy << std::endl; - app_log() << pad << " energy = " << energy << std::endl; - app_log() << pad << " length = " << length << std::endl; - app_log() << pad << " center = " << center << std::endl; - app_log() << pad << " nstates = " << nstates << std::endl; - app_log() << pad << " nmax = " << nmax << std::endl; - app_log() << pad << " ind_dims = " << ind_dims << std::endl; - app_log() << pad << " # basis states = " << basis_states.size() << std::endl; - app_log() << pad << " basis_states" << std::endl; - for (int s = 0; s < basis_states.size(); ++s) - basis_states[s]->report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSetBuilder report" << std::endl; - app_log().flush(); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h index fc2b75be22..a35851c32a 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h @@ -14,48 +14,12 @@ #ifndef QMCPLUSPLUS_SHO_BASIS_BUILDER_H #define QMCPLUSPLUS_SHO_BASIS_BUILDER_H -#include "QMCWaveFunctions/HarmonicOscillator/SHOSet.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetInfo.h" +#include "Configuration.h" +#include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h" namespace qmcplusplus { -struct SHOSetBuilder : public SPOSetBuilder -{ - //enum{DIM=OHMMS_DIM} - - ParticleSet& Ps; - - RealType length; - RealType mass; - RealType energy; - PosType center; - - int nstates; - int nmax; - TinyVector ind_dims; - - SPOSetInfoSimple basis_states; - - //construction/destruction - SHOSetBuilder(ParticleSet& P, Communicate* comm); - - ~SHOSetBuilder() override; - - //reset parameters - void reset(); - - //SPOSetBuilder interface - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - - //local functions - void update_basis_states(int smax); - void report(const std::string& pad = ""); -}; - +using SHOSetBuilder = SHOSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp index 77ae1eda5a..7c309d5b87 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp @@ -208,9 +208,12 @@ SHOSetBuilderT::report(const std::string& pad) app_log().flush(); } +#ifndef QMC_COMPLEX template class SHOSetBuilderT; template class SHOSetBuilderT; +#else template class SHOSetBuilderT>; template class SHOSetBuilderT>; +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp index b4e55a258d..1286b07393 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp @@ -553,9 +553,12 @@ SHOSetT::evaluateGradSource(const ParticleSetT& P, int first, int last, } // Class concrete types from ValueType +#ifndef QMC_COMPLEX template class SHOSetT; template class SHOSetT; +#else template class SHOSetT>; template class SHOSetT>; +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h index b8b99e451b..3ac5ba0dea 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h @@ -14,6 +14,8 @@ #include "OhmmsData/AttributeSet.h" #include "VariableSet.h" +#include "QMCWaveFunctions/OptimizableObject.h" + #include namespace qmcplusplus @@ -28,7 +30,6 @@ class CountingGaussian using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // enumerations for axis parameters enum A_vars diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h index 3543b80270..2a46a3f76d 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h @@ -30,7 +30,6 @@ class CountingGaussianRegion using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // counting function pointers std::vector> C; diff --git a/src/QMCWaveFunctions/Jastrow/CountingJastrow.h b/src/QMCWaveFunctions/Jastrow/CountingJastrow.h index eb0e10b867..178d23d8c8 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingJastrow.h +++ b/src/QMCWaveFunctions/Jastrow/CountingJastrow.h @@ -16,6 +16,7 @@ #include "Particle/ParticleSet.h" #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/Jastrow/CountingGaussianRegion.h" +#include "QMCWaveFunctions/OptimizableObject.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h index 2d5942e745..f4e28f31f8 100644 --- a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h +++ b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h @@ -14,11 +14,12 @@ #ifndef QMCPLUSPLUS_EEI_JASTROW_BUILDER_H #define QMCPLUSPLUS_EEI_JASTROW_BUILDER_H #include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "ParticleSet.h" namespace qmcplusplus { //forward declaration -class ParticleSet; + class eeI_JastrowBuilder : public WaveFunctionComponentBuilder { diff --git a/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h b/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h index 4c65edbedc..7c00d15a3d 100644 --- a/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h +++ b/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h @@ -16,12 +16,10 @@ #define QMCPLUSPLUS_KSPACE_JASTROW_BUILDER_H #include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" #include "QMCWaveFunctions/Jastrow/kSpaceJastrow.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { -//forward declaration -class ParticleSet; - class kSpaceJastrowBuilder : public WaveFunctionComponentBuilder { public: diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp deleted file mode 100644 index 46dcde65d6..0000000000 --- a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp +++ /dev/null @@ -1,846 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "AOBasisBuilder.h" -#include "Utilities/ProgressReportEngine.h" -#include "OhmmsData/AttributeSet.h" -#include "RadialOrbitalSetBuilder.h" -#include "SoaAtomicBasisSet.h" -#include "MultiQuinticSpline1D.h" -#include "MultiFunctorAdapter.h" -#include "Numerics/SoaCartesianTensor.h" -#include "Numerics/SoaSphericalTensor.h" - -namespace qmcplusplus -{ -template -AOBasisBuilder::AOBasisBuilder(const std::string& eName, Communicate* comm) - : MPIObjectBase(comm), - addsignforM(false), - expandlm(GAUSSIAN_EXPAND), - Morder("gaussian"), - sph("default"), - basisType("Numerical"), - elementType(eName), - Normalized("yes") -{ - // mmorales: for "Cartesian Gaussian", m is an integer that maps - // the component to Gamess notation, see Numerics/CartesianTensor.h - nlms_id["n"] = q_n; - nlms_id["l"] = q_l; - nlms_id["m"] = q_m; - nlms_id["s"] = q_s; -} - -template -bool AOBasisBuilder::put(xmlNodePtr cur) -{ - ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)"); - //Register valid attributes attributes - OhmmsAttributeSet aAttrib; - aAttrib.add(basisType, "type"); - aAttrib.add(sph, "angular"); - aAttrib.add(addsignforM, "expM"); - aAttrib.add(Morder, "expandYlm"); - aAttrib.add(Normalized, "normalized"); - aAttrib.put(cur); - PRE.echo(cur); - if (sph == "spherical") - addsignforM = 1; //include (-1)^m - - if (Morder == "gaussian") - expandlm = GAUSSIAN_EXPAND; - else if (Morder == "natural") - expandlm = NATURAL_EXPAND; - else if (Morder == "no") - expandlm = DONOT_EXPAND; - else if (Morder == "pyscf") - { - expandlm = MOD_NATURAL_EXPAND; - addsignforM = 1; - if (sph != "spherical") - { - myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with angular='spherical'. Aborting.\n"); - } - } - - if (sph == "cartesian" || Morder == "Gamess") - { - expandlm = CARTESIAN_EXPAND; - addsignforM = 0; - } - - if (Morder == "Dirac") - { - expandlm = DIRAC_CARTESIAN_EXPAND; - addsignforM = 0; - if (sph != "cartesian") - myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with angular='cartesian'. Aborting\n"); - } - - // Numerical basis is a special case - if (basisType == "Numerical") - myComm->barrier_and_abort("Purely numerical atomic orbitals are not supported any longer."); - - return true; -} - -template -bool AOBasisBuilder::putH5(hdf_archive& hin) -{ - ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)"); - std::string CenterID, basisName; - - if (myComm->rank() == 0) - { - hin.read(sph, "angular"); - hin.read(CenterID, "elementType"); - hin.read(Normalized, "normalized"); - hin.read(Morder, "expandYlm"); - hin.read(basisName, "name"); - } - - myComm->bcast(sph); - myComm->bcast(Morder); - myComm->bcast(CenterID); - myComm->bcast(Normalized); - myComm->bcast(basisName); - myComm->bcast(basisType); - myComm->bcast(addsignforM); - - if (sph == "spherical") - addsignforM = 1; //include (-1)^m - - if (Morder == "gaussian") - expandlm = GAUSSIAN_EXPAND; - else if (Morder == "natural") - expandlm = NATURAL_EXPAND; - else if (Morder == "no") - expandlm = DONOT_EXPAND; - else if (Morder == "pyscf") - { - expandlm = MOD_NATURAL_EXPAND; - addsignforM = 1; - if (sph != "spherical") - { - myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with angular='spherical'. Aborting.\n"); - } - } - - if (sph == "cartesian" || Morder == "Gamess") - { - expandlm = CARTESIAN_EXPAND; - addsignforM = 0; - } - - if (Morder == "Dirac") - { - expandlm = DIRAC_CARTESIAN_EXPAND; - addsignforM = 0; - if (sph != "cartesian") - myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with angular='cartesian'. Aborting\n"); - } - app_log() << R"(" << std::endl; - - return true; -} - - -template -std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) -{ - ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)"); - app_log() << " AO BasisSet for " << elementType << "\n"; - - if (expandlm != CARTESIAN_EXPAND) - { - if (addsignforM) - app_log() << " Spherical Harmonics contain (-1)^m factor" << std::endl; - else - app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" << std::endl; - } - - switch (expandlm) - { - case (GAUSSIAN_EXPAND): - app_log() << " Angular momentum m expanded according to Gaussian" << std::endl; - break; - case (NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; - break; - case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the exception of L=1 (1,-1,0)" << std::endl; - break; - case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx y^ly z^lz according to Gamess" << std::endl; - break; - case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in DIRAC ordering" << std::endl; - break; - default: - app_log() << " Angular momentum m is explicitly given." << std::endl; - } - - QuantumNumberType nlms; - std::string rnl; - int Lmax(0); //maxmimum angular momentum of this center - int num(0); //the number of localized basis functions of this center - //process the basic property: maximun angular momentum, the number of basis functions to be added - std::vector radGroup; - xmlNodePtr cur1 = cur->xmlChildrenNode; - xmlNodePtr gptr = 0; - while (cur1 != NULL) - { - std::string cname1((const char*)(cur1->name)); - if (cname1 == "basisGroup") - { - radGroup.push_back(cur1); - const int l = std::stoi(getXMLAttributeValue(cur1, "l")); - Lmax = std::max(Lmax, l); - //expect that only Rnl is given - if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) - num += (l + 1) * (l + 2) / 2; - else if (expandlm) - num += 2 * l + 1; - else - num++; - } - else if (cname1 == "grid") - { - gptr = cur1; - } - cur1 = cur1->next; - } - - //create a new set of atomic orbitals sharing a center with (Lmax, num) - //if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) - auto aos = std::make_unique(Lmax, addsignforM); - aos->LM.resize(num); - aos->NL.resize(num); - - //Now, add distinct Radial Orbitals and (l,m) channels - RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); - radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGrid(gptr, basisType); //assign a radial grid for the new center - std::vector::iterator it(radGroup.begin()); - std::vector::iterator it_end(radGroup.end()); - std::vector all_nl; - while (it != it_end) - { - cur1 = (*it); - xmlAttrPtr att = cur1->properties; - while (att != NULL) - { - std::string aname((const char*)(att->name)); - if (aname == "rid" || aname == "id") - //accept id/rid - { - rnl = (const char*)(att->children->content); - } - else - { - std::map::iterator iit = nlms_id.find(aname); - if (iit != nlms_id.end()) - //valid for n,l,m,s - { - nlms[(*iit).second] = atoi((const char*)(att->children->content)); - } - } - att = att->next; - } - //add Ylm channels - app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; - std::map::iterator rnl_it = RnlID.find(rnl); - if (rnl_it == RnlID.end()) - { - int nl = aos->RnlID.size(); - if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms)) - RnlID[rnl] = nl; - all_nl.push_back(nl); - } - else - { - all_nl.push_back((*rnl_it).second); - } - ++it; - } - - if (expandYlm(aos.get(), all_nl, expandlm) != num) - myComm->barrier_and_abort("expandYlm doesn't match the number of basis."); - radFuncBuilder.finalize(); - //aos->Rmax can be set small - //aos->setRmax(0); - aos->setBasisSetSize(-1); - app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() << std::endl - << " Number of Radial functors = " << aos->RnlID.size() << std::endl - << " Basis size = " << aos->getBasisSetSize() << "\n\n"; - return aos; -} - - -template -std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) -{ - ReportEngine PRE("AOBasisBuilder:", "createAOSetH5(std::string)"); - app_log() << " AO BasisSet for " << elementType << "\n"; - - if (expandlm != CARTESIAN_EXPAND) - { - if (addsignforM) - app_log() << " Spherical Harmonics contain (-1)^m factor" << std::endl; - else - app_log() << " Spherical Harmonics DO NOT contain (-1)^m factor" << std::endl; - } - - switch (expandlm) - { - case (GAUSSIAN_EXPAND): - app_log() << " Angular momentum m expanded according to Gaussian" << std::endl; - break; - case (NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; - break; - case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the exception of L=1 (1,-1,0)" << std::endl; - break; - case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx y^ly z^lz according to Gamess" << std::endl; - break; - case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in DIRAC ordering" << std::endl; - break; - default: - app_log() << " Angular momentum m is explicitly given." << std::endl; - } - - QuantumNumberType nlms; - std::string rnl; - int Lmax(0); //maxmimum angular momentum of this center - int num(0); //the number of localized basis functions of this center - - int numbasisgroups(0); - if (myComm->rank() == 0) - { - if (!hin.readEntry(numbasisgroups, "NbBasisGroups")) - PRE.error("Could not read NbBasisGroups in H5; Probably Corrupt H5 file", true); - } - myComm->bcast(numbasisgroups); - - for (int i = 0; i < numbasisgroups; i++) - { - std::string basisGroupID = "basisGroup" + std::to_string(i); - int l(0); - if (myComm->rank() == 0) - { - hin.push(basisGroupID); - hin.read(l, "l"); - hin.pop(); - } - myComm->bcast(l); - - Lmax = std::max(Lmax, l); - //expect that only Rnl is given - if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) - num += (l + 1) * (l + 2) / 2; - else if (expandlm) - num += 2 * l + 1; - else - num++; - } - - //create a new set of atomic orbitals sharing a center with (Lmax, num) - //if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) - auto aos = std::make_unique(Lmax, addsignforM); - aos->LM.resize(num); - aos->NL.resize(num); - - //Now, add distinct Radial Orbitals and (l,m) channels - RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); - radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGridH5(hin); //assign a radial grid for the new center - std::vector all_nl; - for (int i = 0; i < numbasisgroups; i++) - { - std::string basisGroupID = "basisGroup" + std::to_string(i); - if (myComm->rank() == 0) - { - hin.push(basisGroupID); - hin.read(rnl, "rid"); - hin.read(nlms[0], "n"); - hin.read(nlms[1], "l"); - } - myComm->bcast(rnl); - myComm->bcast(nlms[0]); - myComm->bcast(nlms[1]); - - //add Ylm channels - app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; - std::map::iterator rnl_it = RnlID.find(rnl); - if (rnl_it == RnlID.end()) - { - int nl = aos->RnlID.size(); - if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms)) - RnlID[rnl] = nl; - all_nl.push_back(nl); - } - else - { - all_nl.push_back((*rnl_it).second); - } - - if (myComm->rank() == 0) - hin.pop(); - } - - if (expandYlm(aos.get(), all_nl, expandlm) != num) - myComm->barrier_and_abort("expandYlm doesn't match the number of basis."); - radFuncBuilder.finalize(); - //aos->Rmax can be set small - //aos->setRmax(0); - aos->setBasisSetSize(-1); - app_log() << " Maximum Angular Momentum = " << aos->Ylm.lmax() << std::endl - << " Number of Radial functors = " << aos->RnlID.size() << std::endl - << " Basis size = " << aos->getBasisSetSize() << "\n\n"; - return aos; -} - - -template -int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expandlm) -{ - int num = 0; - if (expandlm == GAUSSIAN_EXPAND) - { - app_log() << "Expanding Ylm according to Gaussian98" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) - { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << 2 * l + 1 << " spherical orbitals for l= " << l << std::endl; - switch (l) - { - case (0): - aos->LM[num] = aos->Ylm.index(0, 0); - aos->NL[num] = nl; - num++; - break; - case (1): //px(1),py(-1),pz(0) - aos->LM[num] = aos->Ylm.index(1, 1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, -1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, 0); - aos->NL[num] = nl; - num++; - break; - default: //0,1,-1,2,-2,...,l,-l - aos->LM[num] = aos->Ylm.index(l, 0); - aos->NL[num] = nl; - num++; - for (int tm = 1; tm <= l; tm++) - { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(l, -tm); - aos->NL[num] = nl; - num++; - } - break; - } - } - } - else if (expandlm == MOD_NATURAL_EXPAND) - { - app_log() << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) - { - int l = aos->RnlID[nl][q_l]; - app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" << std::endl; - if (l == 1) - { - //px(1),py(-1),pz(0) - aos->LM[num] = aos->Ylm.index(1, 1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, -1); - aos->NL[num] = nl; - num++; - aos->LM[num] = aos->Ylm.index(1, 0); - aos->NL[num] = nl; - num++; - } - else - { - for (int tm = -l; tm <= l; tm++, num++) - { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - } - } - } - } - else if (expandlm == NATURAL_EXPAND) - { - app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) - { - int l = aos->RnlID[nl][q_l]; - app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" << std::endl; - for (int tm = -l; tm <= l; tm++, num++) - { - aos->LM[num] = aos->Ylm.index(l, tm); - aos->NL[num] = nl; - } - } - } - else if (expandlm == CARTESIAN_EXPAND) - { - app_log() << "Expanding Ylm (angular function) according to Gamess using cartesian gaussians" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) - { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl; - int nbefore = 0; - for (int i = 0; i < l; i++) - nbefore += (i + 1) * (i + 2) / 2; - for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) - { - aos->LM[num] = nbefore + i; - aos->NL[num] = nl; - num++; - } - } - } - else if (expandlm == DIRAC_CARTESIAN_EXPAND) - { - app_log() << "Expanding Ylm (angular function) according to DIRAC using cartesian gaussians" << std::endl; - for (int nl = 0; nl < aos->RnlID.size(); nl++) - { - int l = aos->RnlID[nl][q_l]; - app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl; - int nbefore = 0; - for (int i = 0; i < l; i++) - nbefore += (i + 1) * (i + 2) / 2; - switch (l) - { - case (0): - aos->LM[num] = nbefore + 0; - aos->NL[num] = nl; - num++; - break; - case (1): - aos->LM[num] = nbefore + 0; - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; - aos->NL[num] = nl; - num++; - break; - case (2): - aos->LM[num] = nbefore + 0; //xx - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; //xy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; //xz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; //yy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; //yz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; //zz - aos->NL[num] = nl; - num++; - break; - case (3): - aos->LM[num] = nbefore + 0; //xxx - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; //xxy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; //xxz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; //xyy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; //xyz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; //xzz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; //yyy - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; //yyz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; //yzz - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; //zzz - aos->NL[num] = nl; - num++; - break; - case (4): - aos->LM[num] = nbefore + 0; //400 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; //310 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; //301 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; //220 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; //211 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; //202 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; //130 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; //121 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; //112 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; //103 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; //040 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; //031 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; //022 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; //013 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; //004 - aos->NL[num] = nl; - num++; - break; - case (5): - aos->LM[num] = nbefore + 0; //500 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; //410 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; //401 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; //320 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 15; //311 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; //302 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; //230 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 18; //221 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 19; //212 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; //203 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; //140 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 16; //131 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 20; //122 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 17; //113 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; //104 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; //050 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; //041 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; //032 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; //023 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; //014 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; //005 - aos->NL[num] = nl; - num++; - break; - case (6): - aos->LM[num] = nbefore + 0; //600 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 3; //510 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 4; //501 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 9; //420 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 15; //411 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 10; //402 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 18; //330 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 21; //321 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 22; //312 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 19; //303 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 11; //240 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 23; //231 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 27; //222 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 25; //213 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 13; //204 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 5; //150 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 16; //141 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 24; //132 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 26; //123 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 17; //114 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 7; //105 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 1; //060 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 6; //051 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 12; //042 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 20; //033 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 14; //024 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 8; //015 - aos->NL[num] = nl; - num++; - aos->LM[num] = nbefore + 2; //006 - aos->NL[num] = nl; - num++; - break; - default: - myComm->barrier_and_abort("Cartesian Tensor only defined up to Lmax=6. Aborting\n"); - break; - } - } - } - else - { - for (int ind = 0; ind < all_nl.size(); ind++) - { - int nl = all_nl[ind]; - int l = aos->RnlID[nl][q_l]; - int m = aos->RnlID[nl][q_m]; - //assign the index for real Spherical Harmonic with (l,m) - aos->LM[num] = aos->Ylm.index(l, m); - //assign the index for radial orbital with (n,l) - aos->NL[num] = nl; - //increment number of basis functions - num++; - } - } - return num; -} - -template class AOBasisBuilder< - SoaAtomicBasisSet, SoaCartesianTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet, SoaSphericalTensor>>; -template class AOBasisBuilder>, - SoaCartesianTensor>>; -template class AOBasisBuilder>, - SoaSphericalTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet>, SoaCartesianTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet>, SoaSphericalTensor>>; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h index 3ca6015545..eb708e7794 100644 --- a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h @@ -17,59 +17,11 @@ #ifndef QMCPLUSPLUS_ATOMICORBITALBUILDER_H #define QMCPLUSPLUS_ATOMICORBITALBUILDER_H - -#include "Message/MPIObjectBase.h" -#include "hdf/hdf_archive.h" -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/AOBasisBuilderT.h" namespace qmcplusplus { -/** atomic basisset builder - * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet - * - * Reimplement AtomiSPOSetBuilder.h - */ -template -class AOBasisBuilder : public MPIObjectBase -{ -public: - enum - { - DONOT_EXPAND = 0, - GAUSSIAN_EXPAND = 1, - NATURAL_EXPAND, - CARTESIAN_EXPAND, - MOD_NATURAL_EXPAND, - DIRAC_CARTESIAN_EXPAND - }; - -private: - bool addsignforM; - int expandlm; - std::string Morder; - std::string sph; - std::string basisType; - std::string elementType; - std::string Normalized; - - ///map for the radial orbitals - std::map RnlID; - - ///map for (n,l,m,s) to its quantum number index - std::map nlms_id; - -public: - AOBasisBuilder(const std::string& eName, Communicate* comm); - - bool put(xmlNodePtr cur); - bool putH5(hdf_archive& hin); - - SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; } - - std::unique_ptr createAOSet(xmlNodePtr cur); - std::unique_ptr createAOSetH5(hdf_archive& hin); - - int expandYlm(COT* aos, std::vector& all_nl, int expandlm = DONOT_EXPAND); -}; +using AOBasisBuilder = AOBasisBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrection.h b/src/QMCWaveFunctions/LCAO/CuspCorrection.h index 08c2c42219..f89779584a 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrection.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrection.h @@ -20,74 +20,15 @@ #ifndef QMCPLUSPLUS_CUSPCORRECTION_H #define QMCPLUSPLUS_CUSPCORRECTION_H -#include #include "Configuration.h" +#include "QMCWaveFunctions/LCAO/CuspCorrectionT.h" namespace qmcplusplus { -/** - * @brief Cusp correction parameters - * - * From "Scheme for adding electron-nuclear cusps to Gaussian orbitals" Ma, Towler, Drummond, and Needs - * JCP 122, 224322 (2005) - * - * Equations 7 and 8 in the paper define the correction. These are the parameters in those equations. - */ - -struct CuspCorrectionParameters -{ - using ValueType = QMCTraits::ValueType; - using RealType = QMCTraits::RealType; - - /// The cutoff radius - RealType Rc; - - /// A shift to keep correction to a single sign - RealType C; - - /// The sign of the wavefunction at the nucleus - RealType sg; - - /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8 - TinyVector alpha; - - /// Flag to indicate the correction should be recalculated - int redo; - - CuspCorrectionParameters() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) {} -}; - -/// Formulas for applying the cusp correction - -class CuspCorrection -{ - using RealType = QMCTraits::RealType; - -public: - inline RealType Rr(RealType r) const { return cparam.sg * std::exp(pr(r)); } - - inline RealType pr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[0] + alpha[1] * r + alpha[2] * r * r + alpha[3] * r * r * r + alpha[4] * r * r * r * r; - } - - inline RealType dpr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + 4.0 * alpha[4] * r * r * r; - } - - inline RealType d2pr(RealType r) const - { - auto& alpha = cparam.alpha; - return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r; - } +using CuspCorrectionParameters = CuspCorrectionParametersT; - CuspCorrection(const CuspCorrectionParameters& param) : cparam(param) {} +using CuspCorrection = CuspCorrectionT; - CuspCorrectionParameters cparam; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp deleted file mode 100644 index 588f323eff..0000000000 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp +++ /dev/null @@ -1,789 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2023 QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "CuspCorrectionConstruction.h" -#include "Message/Communicate.h" -#include "SoaCuspCorrectionBasisSet.h" -#include "Utilities/FairDivide.h" -#include "SoaLocalizedBasisSet.h" -#include "SoaAtomicBasisSet.h" -#include "MultiQuinticSpline1D.h" -#include "Numerics/MinimizeOneDim.h" -#include "OhmmsData/AttributeSet.h" - - -namespace qmcplusplus -{ -bool readCuspInfo(const std::string& cuspInfoFile, - const std::string& objectName, - int OrbitalSetSize, - Matrix& info) -{ - bool success = true; - int ncenter = info.rows(); - app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl; - Libxml2Document adoc; - if (!adoc.parse(cuspInfoFile)) - { - app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; - } - xmlNodePtr head = adoc.getRoot(); - head = head->children; - xmlNodePtr cur = NULL, ctr; - while (head != NULL) - { - std::string cname(getNodeName(head)); - if (cname == "sposet") - { - std::string name; - OhmmsAttributeSet spoAttrib; - spoAttrib.add(name, "name"); - spoAttrib.put(head); - if (name == objectName) - { - cur = head; - break; - } - } - head = head->next; - } - if (cur == NULL) - { - app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; - } - else - { - app_log() << "Found precomputed cusp data for spo set: " << objectName << std::endl; - } - cur = cur->children; - while (cur != NULL) - { - std::string cname(getNodeName(cur)); - if (cname == "center") - { - int num = -1; - OhmmsAttributeSet Attrib; - Attrib.add(num, "num"); - Attrib.put(cur); - if (num < 0 || num >= ncenter) - { - APP_ABORT("Error with cusp info xml block. incorrect center number. \n"); - } - ctr = cur->children; - while (ctr != NULL) - { - std::string cname(getNodeName(ctr)); - if (cname == "orbital") - { - int orb = -1; - OhmmsAttributeSet orbAttrib; - QMCTraits::RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; - orbAttrib.add(orb, "num"); - orbAttrib.add(a1, "redo"); - orbAttrib.add(a2, "C"); - orbAttrib.add(a3, "sg"); - orbAttrib.add(a4, "rc"); - orbAttrib.add(a5, "a1"); - orbAttrib.add(a6, "a2"); - orbAttrib.add(a7, "a3"); - orbAttrib.add(a8, "a4"); - orbAttrib.add(a9, "a5"); - orbAttrib.put(ctr); - if (orb < OrbitalSetSize) - { - info(num, orb).redo = a1; - info(num, orb).C = a2; - info(num, orb).sg = a3; - info(num, orb).Rc = a4; - info(num, orb).alpha[0] = a5; - info(num, orb).alpha[1] = a6; - info(num, orb).alpha[2] = a7; - info(num, orb).alpha[3] = a8; - info(num, orb).alpha[4] = a9; - } - } - ctr = ctr->next; - } - } - cur = cur->next; - } - return success; -} - -void saveCusp(const std::string& filename, const Matrix& info, const std::string& id) -{ - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); - xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); - xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet"); - xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str()); - xmlAddChild(cuspRoot, spo); - xmlDocSetRootElement(doc, cuspRoot); - - for (int center_idx = 0; center_idx < num_centers; center_idx++) - { - xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center"); - std::ostringstream num; - num << center_idx; - xmlNewProp(ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str()); - - for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) - { - std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5; - xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital"); - num0 << mo_idx; - xmlNewProp(orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str()); - - - C.setf(std::ios::scientific, std::ios::floatfield); - C.precision(14); - C << info(center_idx, mo_idx).C; - sg.setf(std::ios::scientific, std::ios::floatfield); - sg.precision(14); - sg << info(center_idx, mo_idx).sg; - rc.setf(std::ios::scientific, std::ios::floatfield); - rc.precision(14); - rc << info(center_idx, mo_idx).Rc; - a1.setf(std::ios::scientific, std::ios::floatfield); - a1.precision(14); - a1 << info(center_idx, mo_idx).alpha[0]; - a2.setf(std::ios::scientific, std::ios::floatfield); - a2.precision(14); - a2 << info(center_idx, mo_idx).alpha[1]; - a3.setf(std::ios::scientific, std::ios::floatfield); - a3.precision(14); - a3 << info(center_idx, mo_idx).alpha[2]; - a4.setf(std::ios::scientific, std::ios::floatfield); - a4.precision(14); - a4 << info(center_idx, mo_idx).alpha[3]; - a5.setf(std::ios::scientific, std::ios::floatfield); - a5.precision(14); - a5 << info(center_idx, mo_idx).alpha[4]; - xmlNewProp(orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str()); - xmlAddChild(ctr, orb); - } - xmlAddChild(spo, ctr); - } - - app_log() << "Saving resulting cusp Info xml block to: " << filename << std::endl; - xmlSaveFormatFile(filename.c_str(), doc, 1); - xmlFreeDoc(doc); -} - -void broadcastCuspInfo(CuspCorrectionParameters& param, Communicate& Comm, int root) -{ -#ifdef HAVE_MPI - std::vector buffer(9); - buffer[0] = param.Rc; - buffer[1] = param.C; - buffer[2] = param.sg; - buffer[3] = param.alpha[0]; - buffer[4] = param.alpha[1]; - buffer[5] = param.alpha[2]; - buffer[6] = param.alpha[3]; - buffer[7] = param.alpha[4]; - buffer[8] = param.redo; - - Comm.comm.broadcast(buffer.begin(), buffer.end(), root); - - param.Rc = buffer[0]; - param.C = buffer[1]; - param.sg = buffer[2]; - param.alpha[0] = buffer[3]; - param.alpha[1] = buffer[4]; - param.alpha[2] = buffer[5]; - param.alpha[3] = buffer[6]; - param.alpha[4] = buffer[7]; - param.redo = buffer[8] == 0.0 ? 0 : 1; -#endif -} - -void splitPhiEta(int center, const std::vector& corrCenter, LCAOrbitalSet& Phi, LCAOrbitalSet& Eta) -{ - using RealType = QMCTraits::RealType; - - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); - std::vector correct_this_center(corrCenter.size(), false); - correct_this_center[center] = corrCenter[center]; - - Phi.myBasisSet->queryOrbitalsForSType(correct_this_center, is_s_orbital); - - int nOrbs = Phi.getOrbitalSetSize(); - int bss = Phi.getBasisSetSize(); - - for (int i = 0; i < bss; i++) - { - if (is_s_orbital[i]) - { - auto& cref(*(Eta.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; //Eta->C(k,i) = 0.0; - } - else - { - auto& cref(*(Phi.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; //Phi->C(k,i) = 0.0; - } - } -} - -void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSet& Phi) -{ - using RealType = QMCTraits::RealType; - - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); - - Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital); - - int nOrbs = Phi.getOrbitalSetSize(); - int bss = Phi.getBasisSetSize(); - - for (int i = 0; i < bss; i++) - { - if (is_s_orbital[i]) - { - auto& cref(*(Phi.C)); - for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; - } - } -} - - -// Will be the corrected value for r < rc and the original wavefunction for r > rc -void computeRadialPhiBar(ParticleSet* targetP, - ParticleSet* sourceP, - int curOrb_, - int curCenter_, - SPOSet* Phi, - Vector& xgrid, - Vector& rad_orb, - const CuspCorrectionParameters& data) -{ - OneMolecularOrbital phiMO(targetP, sourceP, Phi); - phiMO.changeOrbital(curCenter_, curOrb_); - CuspCorrection cusp(data); - - for (int i = 0; i < xgrid.size(); i++) - { - rad_orb[i] = phiBar(cusp, xgrid[i], phiMO); - } -} - -using RealType = QMCTraits::RealType; - -// Get the ideal local energy at one point -// Eq. 17 in the paper. Coefficients are taken from the paper. -RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0) -{ - RealType beta[7] = {3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692}; - RealType idealEL = beta0; - RealType r1 = r * r; - for (int i = 0; i < 7; i++) - { - idealEL += beta[i] * r1; - r1 *= r; - } - return idealEL * Z * Z; -} - -// Get the ideal local energy for a vector of positions -void getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal) -{ - // assert(pos.size() == ELideal.size() - RealType beta0 = 0.0; - RealType tmp = getOneIdealLocalEnergy(Rc, Z, beta0); - beta0 = (ELorigAtRc - tmp) / (Z * Z); - for (int i = 0; i < pos.size(); i++) - { - ELideal[i] = getOneIdealLocalEnergy(pos[i], Z, beta0); - } -} - -// Evaluate constraints. Equations 9-13 in the paper. -void evalX(RealType valRc, - GradType gradRc, - ValueType lapRc, - RealType Rc, - RealType Z, - RealType C, - RealType valAtZero, - RealType eta0, - TinyVector& X) -{ - X[0] = std::log(std::abs(valRc - C)); - X[1] = gradRc[0] / (valRc - C); - X[2] = (lapRc - 2.0 * gradRc[0] / Rc) / (valRc - C); - X[3] = -Z * (valAtZero + eta0) / (valAtZero - C); - X[4] = std::log(std::abs(valAtZero - C)); -} - -// Compute polynomial coefficients from constraints. Eq. 14 in the paper. -void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha) -{ - RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv; - alpha[0] = X[4]; - alpha[1] = X[3]; - alpha[2] = 6.0 * X[0] * RcInv2 - 3.0 * X[1] * RcInv + X[2] * 0.5 - 3.0 * X[3] * RcInv - 6.0 * X[4] * RcInv2 - - 0.5 * X[1] * X[1]; - alpha[3] = -8.0 * X[0] * RcInv2 * RcInv + 5.0 * X[1] * RcInv2 - X[2] * RcInv + 3.0 * X[3] * RcInv2 + - 8.0 * X[4] * RcInv2 * RcInv + X[1] * X[1] * RcInv; - alpha[4] = 3.0 * X[0] * RcInv2 * RcInv2 - 2.0 * X[1] * RcInv2 * RcInv + 0.5 * X[2] * RcInv2 - X[3] * RcInv2 * RcInv - - 3.0 * X[4] * RcInv2 * RcInv2 - 0.5 * X[1] * X[1] * RcInv2; -} - -// Eq. 16 in the paper. -RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero) { return Z * (1.0 + etaAtZero / phiBarAtZero); } - -RealType phiBar(const CuspCorrection& cusp, RealType r, OneMolecularOrbital& phiMO) -{ - if (r <= cusp.cparam.Rc) - return cusp.cparam.C + cusp.Rr(r); - else - return phiMO.phi(r); -} - -// Compute the effective one-electron local energy at a vector of points. -// Eq. 15 in the paper for r < Rc. Normal local energy for R > Rc. -void getCurrentLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - RealType originalELatRc, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValueVector& ELcurr) -{ - // assert(pos.size() == ELcurr.size()); - ValueType val; - GradType grad; - ValueType lap; - phiMO.phi_vgl(Rc, val, grad, lap); - RealType dE = originalELatRc - (-0.5 * lap / val - Zeff / Rc); - for (int i = 0; i < pos.size(); i++) - { - RealType r = pos[i]; - // prevent NaN's if phiBar is zero - RealType offset = 1e-12; - if (r <= Rc) - { - RealType dp = cusp.dpr(r); - ELcurr[i] = -0.5 * cusp.Rr(r) * (2.0 * dp / r + cusp.d2pr(r) + dp * dp) / (offset + phiBar(cusp, r, phiMO)) - - Zeff / r + dE; - } - else - { - phiMO.phi_vgl(pos[i], val, grad, lap); - ELcurr[i] = -0.5 * lap / val - Zeff / r + dE; - } - } -} - -// Return value is local energy at Rc -RealType getOriginalLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - OneMolecularOrbital& phiMO, - ValueVector& ELorig) -{ - // assert(pos.size() == ELorig.size()); - - ValueType val; - GradType grad; - ValueType lap; - for (int i = 0; i < pos.size(); i++) - { - RealType r = pos[i]; - phiMO.phi_vgl(r, val, grad, lap); - ELorig[i] = -0.5 * lap / val - Zeff / r; - } - - phiMO.phi_vgl(Rc, val, grad, lap); - return -0.5 * lap / val - Zeff / Rc; -} - -// Sum of squares difference between the current local energy and the ideal local energy. -// This is the objective function to minimize. -RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal) -{ - assert(ELcurr.size() == ELideal.size()); - - RealType chi2 = 0.0; - for (int i = 0; i < ELcurr.size(); i++) - { - RealType diff = ELcurr[i] - ELideal[i]; - chi2 += diff * diff; - } - return chi2; -} - -struct ValGradLap -{ - ValueType val; - GradType grad; - ValueType lap; -}; - -// Compute the chi squared distance given a value for phi at zero. -RealType evaluateForPhi0Body(RealType phi0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValGradLap phiAtRc, - RealType etaAtZero, - RealType ELorigAtRc, - RealType Z) -{ - cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0; - cusp.cparam.C = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0; - TinyVector X; - evalX(phiAtRc.val, phiAtRc.grad, phiAtRc.lap, cusp.cparam.Rc, Z, cusp.cparam.C, phi0, etaAtZero, X); - X2alpha(X, cusp.cparam.Rc, cusp.cparam.alpha); - RealType Zeff = getZeff(Z, etaAtZero, phiBar(cusp, 0.0, phiMO)); - getCurrentLocalEnergy(pos, Zeff, cusp.cparam.Rc, ELorigAtRc, cusp, phiMO, ELcurr); - RealType chi2 = getELchi2(ELcurr, ELideal); - return chi2; -} - -// Optimize free parameter (value of phi at zero) to minimize distance to ideal local energy. -// Output is return value and parameter values are in cusp.cparam -RealType minimizeForPhiAtZero(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - RealType start_phi0) -{ - ValGradLap vglAtRc; - ValueVector tmp_pos(0); - ValueVector ELorig(0); - RealType Zeff = getZeff(Z, eta0, phiBar(cusp, 0.0, phiMO)); - - RealType ELorigAtRc = getOriginalLocalEnergy(tmp_pos, Zeff, cusp.cparam.Rc, phiMO, ELorig); - getIdealLocalEnergy(pos, Z, cusp.cparam.Rc, ELorigAtRc, ELideal); - phiMO.phi_vgl(cusp.cparam.Rc, vglAtRc.val, vglAtRc.grad, vglAtRc.lap); - - Bracket_min_t bracket(start_phi0, 0.0, 0.0, false); - try - { - bracket = bracket_minimum( - [&](RealType x) -> RealType { - return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z); - }, - start_phi0); - } - catch (const std::runtime_error& e) - { - APP_ABORT("Bracketing minimum failed for finding phi0. \n"); - } - - auto min_res = find_minimum( - [&](RealType x) -> RealType { - return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z); - }, - bracket); - - start_phi0 = min_res.first; - - return min_res.second; -} - - -// Optimize the cutoff radius. There is an inner loop optimizing for phi0 for each value of Rc. -// Elcurr and ELideal are expected to have the correct size on input (same size as pos) -// Output is parameter values in cusp.cparam -void minimizeForRc(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType Rc_init, - RealType Rc_max, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal) -{ - Bracket_min_t bracket(Rc_init, 0.0, 0.0, false); - RealType start_phi0 = phiMO.phi(0.0); - try - { - bracket = bracket_minimum( - [&](RealType x) -> RealType { - cusp.cparam.Rc = x; - return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - }, - Rc_init, Rc_max); - } - catch (const std::runtime_error& e) - { - APP_ABORT("Bracketing minimum failed for finding rc. \n"); - } - - - if (bracket.success) - { - auto min_res = find_minimum( - [&](RealType x) -> RealType { - cusp.cparam.Rc = x; - return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - }, - bracket); - } - else - { - cusp.cparam.Rc = bracket.a; - minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0); - } -} - -// Modifies orbital set lcwc -void applyCuspCorrection(const Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSet& lcao, - SoaCuspCorrection& cusp, - const std::string& id) -{ - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - using RealType = QMCTraits::RealType; - - NewTimer& cuspApplyTimer = createGlobalTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium); - - ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); - - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - std::vector corrCenter(num_centers, "true"); - - //What's this grid's lifespan? Why on the heap? - auto radial_grid = std::make_unique>(); - radial_grid->set(0.000001, 100.0, 1001); - - - Vector xgrid; - Vector rad_orb; - xgrid.resize(radial_grid->size()); - rad_orb.resize(radial_grid->size()); - for (int ig = 0; ig < radial_grid->size(); ig++) - { - xgrid[ig] = radial_grid->r(ig); - } - - for (int ic = 0; ic < num_centers; ic++) - { - *eta.C = *lcao.C; - *phi.C = *lcao.C; - - splitPhiEta(ic, corrCenter, phi, eta); - - // loop over MO index - cot must be an array (of len MO size) - // the loop is inside cot - in the multiqunitic - auto cot = std::make_unique>(); - cot->initializeRadialSet(*radial_grid, orbital_set_size); - //How is this useful? - // cot->ID.resize(orbital_set_size); - // for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { - // cot->ID[mo_idx] = mo_idx; - // } - - for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) - { - computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, xgrid, rad_orb, info(ic, mo_idx)); - RealType yprime_i = (rad_orb[1] - rad_orb[0]) / (radial_grid->r(1) - radial_grid->r(0)); - OneDimQuinticSpline radial_spline(radial_grid->makeClone(), rad_orb); - radial_spline.spline(0, yprime_i, rad_orb.size() - 1, 0.0); - cot->addSpline(mo_idx, radial_spline); - - if (outputManager.isDebugActive()) - { - // For testing against AoS output - // Output phiBar to soaOrbs.downdet.C0.MO0 - int nElms = 500; - RealType dx = info(ic, mo_idx).Rc * 1.2 / nElms; - Vector pos; - Vector output_orb; - pos.resize(nElms); - output_orb.resize(nElms); - for (int i = 0; i < nElms; i++) - { - pos[i] = (i + 1.0) * dx; - } - computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, pos, output_orb, info(ic, mo_idx)); - std::string filename = "soaOrbs." + id + ".C" + std::to_string(ic) + ".MO" + std::to_string(mo_idx); - std::cout << "Writing to " << filename << std::endl; - std::ofstream out(filename.c_str()); - out << "# r phiBar(r)" << std::endl; - for (int i = 0; i < nElms; i++) - { - out << pos[i] << " " << output_orb[i] << std::endl; - } - out.close(); - } - } - cusp.add(ic, std::move(cot)); - } - removeSTypeOrbitals(corrCenter, lcao); -} - -void generateCuspInfo(Matrix& info, - const ParticleSet& targetPtcl, - const ParticleSet& sourcePtcl, - const LCAOrbitalSet& lcao, - const std::string& id, - Communicate& Comm) -{ - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - using RealType = QMCTraits::RealType; - - NewTimer& cuspCreateTimer = createGlobalTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium); - NewTimer& splitPhiEtaTimer = createGlobalTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine); - NewTimer& computeTimer = createGlobalTimer("CuspCorrectionConstruction::computeCorrection", timer_level_fine); - - ScopedTimer createCuspTimerWrapper(cuspCreateTimer); - - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - - std::vector corrCenter(num_centers, "true"); - - using GridType = OneDimGridBase; - int npts = 500; - - // Parallelize correction of MO's across MPI ranks - std::vector offset; - FairDivideLow(orbital_set_size, Comm.size(), offset); - - int start_mo = offset[Comm.rank()]; - int end_mo = offset[Comm.rank() + 1]; - app_log() << " Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl; - -// Specify dynamic scheduling explicitly for load balancing. Each iteration should take enough -// time that scheduling overhead is not an issue. -#pragma omp parallel for schedule(dynamic) collapse(2) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - { - for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) - { - ParticleSet localTargetPtcl(targetPtcl); - ParticleSet localSourcePtcl(sourcePtcl); - - LCAOrbitalSet local_phi("local_phi", std::unique_ptr(phi.myBasisSet->makeClone())); - local_phi.setOrbitalSetSize(phi.getOrbitalSetSize()); - - LCAOrbitalSet local_eta("local_eta", std::unique_ptr(eta.myBasisSet->makeClone())); - local_eta.setOrbitalSetSize(eta.getOrbitalSetSize()); - -#pragma omp critical - app_log() << " Working on MO: " << mo_idx << " Center: " << center_idx << std::endl; - - { - ScopedTimer local_timer(splitPhiEtaTimer); - - *local_eta.C = *lcao.C; - *local_phi.C = *lcao.C; - splitPhiEta(center_idx, corrCenter, local_phi, local_eta); - } - - bool corrO = false; - auto& cref(*(local_phi.C)); - for (int ip = 0; ip < cref.cols(); ip++) - { - if (std::abs(cref(mo_idx, ip)) > 0) - { - corrO = true; - break; - } - } - - if (corrO) - { - OneMolecularOrbital etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta); - etaMO.changeOrbital(center_idx, mo_idx); - - OneMolecularOrbital phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi); - phiMO.changeOrbital(center_idx, mo_idx); - - SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet()); - int iz = tspecies.addAttribute("charge"); - RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]); - - RealType Rc_max = 0.2; - RealType rc = 0.1; - - RealType dx = rc * 1.2 / npts; - ValueVector pos(npts); - ValueVector ELideal(npts); - ValueVector ELcurr(npts); - for (int i = 0; i < npts; i++) - { - pos[i] = (i + 1.0) * dx; - } - - RealType eta0 = etaMO.phi(0.0); - ValueVector ELorig(npts); - CuspCorrection cusp(info(center_idx, mo_idx)); - { - ScopedTimer local_timer(computeTimer); - minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal); - } - // Update shared object. Each iteration accesses a different element and - // this is an array (no bookkeeping data to update), so no synchronization - // is necessary. - info(center_idx, mo_idx) = cusp.cparam; - } - } - } - - for (int root = 0; root < Comm.size(); root++) - { - int start_mo = offset[root]; - int end_mo = offset[root + 1]; - for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) - { - for (int center_idx = 0; center_idx < num_centers; center_idx++) - { - broadcastCuspInfo(info(center_idx, mo_idx), Comm, root); - } - } - } -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h index 3d1854cea4..a64c0c178d 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h @@ -14,271 +14,7 @@ #ifndef QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H #define QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H -#include "LCAOrbitalSet.h" -#include "SoaCuspCorrection.h" -#include "CuspCorrection.h" - -class Communicate; -namespace qmcplusplus -{ - -class ParticleSet; -/// Broadcast cusp correction parameters -void broadcastCuspInfo(CuspCorrectionParameters& param, Communicate& Comm, int root); - -class OneMolecularOrbital -{ - using RealType = QMCTraits::RealType; - using ValueType = QMCTraits::ValueType; - using GradType = QMCTraits::GradType; - using ValueVector = OrbitalSetTraits::ValueVector; - using GradVector = OrbitalSetTraits::GradVector; - using SPOSetPtr = SPOSet*; - -public: - RealType phi(RealType r) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateValue(*targetPtcl, 0, val1); - - return val1[curOrb]; - } - - void phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1); - - val = val1[curOrb]; - grad = grad1[curOrb]; - lap = lap1[curOrb]; - } - - OneMolecularOrbital(ParticleSet* targetP, ParticleSet* sourceP, SPOSetPtr Phi) - : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), curCenter(0) - { - Psi1 = Phi; - int norb = Psi1->getOrbitalSetSize(); - val1.resize(norb); - grad1.resize(norb); - lap1.resize(norb); - } - - void changeOrbital(int centerIdx, int orbIdx) - { - curCenter = centerIdx; - curOrb = orbIdx; - } - -private: - /// Temporary storage for real wavefunction values - ValueVector val1; - GradVector grad1; - ValueVector lap1; - - /// target ParticleSet - ParticleSet* targetPtcl; - /// source ParticleSet - ParticleSet* sourcePtcl; - - /// Index of orbital - int curOrb; - - /// Index of atomic center - int curCenter; - - SPOSetPtr Psi1; -}; - -/// Read cusp correction parameters from XML file -bool readCuspInfo(const std::string& cuspInfoFile, - const std::string& objectName, - int OrbitalSetSize, - Matrix& info); - -/// save cusp correction info to a file. -void saveCusp(const std::string& filename, const Matrix& info, const std::string& id); - -/// Divide molecular orbital into atomic S-orbitals on this center (phi), and everything else (eta). -void splitPhiEta(int center, const std::vector& corrCenter, LCAOrbitalSet& phi, LCAOrbitalSet& eta); - -/// Remove S atomic orbitals from all molecular orbitals on all centers. -void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSet& Phi); - -/// Compute the radial part of the corrected wavefunction -void computeRadialPhiBar(ParticleSet* targetP, - ParticleSet* sourceP, - int curOrb_, - int curCenter_, - SPOSet* Phi, - Vector& xgrid, - Vector& rad_orb, - const CuspCorrectionParameters& data); - -using RealType = QMCTraits::RealType; -using ValueType = QMCTraits::ValueType; -using GradType = QMCTraits::GradType; -using ValueVector = OrbitalSetTraits::ValueVector; - -/** Ideal local energy at one point - * @param r input radial distance - * @param Z nuclear charge - * @param beta0 adjustable parameter to make energy continuous at Rc - */ -RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0); - -/** Ideal local energy at a vector of points - * @param pos input vector of radial distances - * @param Z nuclear charge - * @param Rc cutoff radius where the correction meets the actual orbital - * @param ELorigAtRc local energy at Rc. beta0 is adjusted to make energy continuous at Rc - * @param ELideal - output the ideal local energy at pos values - */ -void getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal); - -/** Evaluate various orbital quantities that enter as constraints on the correction - * @param valRc orbital value at Rc - * @param gradRc orbital gradient at Rc - * @param lapRc orbital laplacian at Rc - * @param Rc cutoff radius - * @param Z nuclear charge - * @param C offset to keep correction to a single sign - * @param valAtZero orbital value at zero - * @param eta0 value of non-corrected pieces of the orbital at zero - * @param X output - */ -void evalX(RealType valRc, - GradType gradRc, - ValueType lapRc, - RealType Rc, - RealType Z, - RealType C, - RealType valAtZero, - RealType eta0, - TinyVector& X); - -/** Convert constraints to polynomial parameters - * @param X input from evalX - * @param Rc cutoff radius - * @param alpha output the polynomial parameters for the correction - */ -void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha); - -/** Effective nuclear charge to keep effective local energy finite at zero - * @param Z nuclear charge - * @param etaAtZero value of non-S orbitals at this center - * @param phiBarAtZero value of corrected orbital at zero - */ -RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero); - -RealType phiBar(const CuspCorrection& cusp, RealType r, OneMolecularOrbital& phiMO); - -/** Compute effective local energy at vector of points - * @param pos input vector of radial distances - * @param Zeff effective charge from getZeff - * @param Rc cutoff radius - * @param originalELatRc Local energy at the center from the uncorrected orbital - * @param cusp cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param ELcurr output local energy at each distance in pos - */ -void getCurrentLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - RealType originalELatRc, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValueVector& ELcurr); - -/** Local energy from uncorrected orbital - * @param pos input vector of radial distances - * @param Zeff nuclear charge - * @param Rc cutoff radius - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param ELorig output local energy at each distance in pos - * - * Return is value of local energy at zero. This is the value needed for subsequent computations. - * The routine can be called with an empty vector of positions to get just this value. - */ -RealType getOriginalLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - OneMolecularOrbital& phiMO, - ValueVector& Elorig); - -/** Sum of squares difference between the current and ideal local energies - * This is the objective function to be minimized. - * @param Elcurr current local energy - * @param Elideal ideal local energy - */ -RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal); - - -/** Minimize chi2 with respect to phi at zero for a fixed Rc - * @param cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param Z nuclear charge - * @param eta0 value at zero for parts of the orbital that don't require correction - the non-S-orbitals on this center and all orbitals on other centers - * @param pos vector of radial positions - * @param Elcurr storage for current local energy - * @param Elideal storage for ideal local energy - */ -RealType minimizeForPhiAtZero(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - RealType start_phi0); - - -/** Minimize chi2 with respect to Rc and phi at zero. - * @param cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param Z nuclear charge - * @param Rc_init initial value for Rc - * @param Rc_max maximum value for Rc - * @param eta0 value at zero for parts of the orbital that don't require correction - the non-S-orbitals on this center and all orbitals on other centers - * @param pos vector of radial positions - * @param Elcurr storage for current local energy - * @param Elideal storage for ideal local energy - * - * Output is parameter values in cusp.cparam - */ -void minimizeForRc(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType Rc_init, - RealType Rc_max, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal); - -// Modifies orbital set lcwc -void applyCuspCorrection(const Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSet& lcao, - SoaCuspCorrection& cusp, - const std::string& id); - -void generateCuspInfo(Matrix& info, - const ParticleSet& targetPtcl, - const ParticleSet& sourcePtcl, - const LCAOrbitalSet& lcao, - const std::string& id, - Communicate& Comm); - -} // namespace qmcplusplus +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h" #endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp index d41624e9db..8ae6df2262 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp @@ -798,4 +798,5 @@ CuspCorrectionConstructionT::saveCusp(const std::string& filename, template class CuspCorrectionConstructionT; template class CuspCorrectionConstructionT; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp deleted file mode 100644 index 8906c0f42c..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp +++ /dev/null @@ -1,206 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories -// -// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "LCAOSpinorBuilder.h" -#include "QMCWaveFunctions/SpinorSet.h" -#include "OhmmsData/AttributeSet.h" -#include "Utilities/ProgressReportEngine.h" -#include "hdf/hdf_archive.h" -#include "Message/CommOperators.h" - -namespace qmcplusplus -{ -LCAOSpinorBuilder::LCAOSpinorBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) - : LCAOrbitalBuilder(els, ions, comm, cur) -{ - ClassName = "LCAOSpinorBuilder"; - - if (h5_path == "") - myComm->barrier_and_abort("LCAOSpinorBuilder only works with href"); -} - -std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); - std::string spo_name(""), optimize("no"); - std::string basisset_name("LCAOBSet"); - OhmmsAttributeSet spoAttrib; - spoAttrib.add(spo_name, "name"); - spoAttrib.add(optimize, "optimize"); - spoAttrib.add(basisset_name, "basisset"); - spoAttrib.put(cur); - - BasisSet_t* myBasisSet = nullptr; - if (basisset_map_.find(basisset_name) == basisset_map_.end()) - myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); - else - myBasisSet = basisset_map_[basisset_name].get(); - - if (optimize == "yes") - app_log() << " SPOSet " << spo_name << " is optimizable\n"; - - std::unique_ptr upspo = - std::make_unique(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); - std::unique_ptr dnspo = - std::make_unique(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); - - loadMO(*upspo, *dnspo, cur); - - //create spinor and register up/dn - auto spinor_set = std::make_unique(spo_name); - spinor_set->set_spos(std::move(upspo), std::move(dnspo)); - return spinor_set; -} - -bool LCAOSpinorBuilder::loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur) -{ - bool PBC = false; - int norb = up.getBasisSetSize(); - std::string debugc("no"); - OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "size"); - aAttrib.add(debugc, "debug"); - aAttrib.put(cur); - - up.setOrbitalSetSize(norb); - dn.setOrbitalSetSize(norb); - - xmlNodePtr occ_ptr = nullptr; - cur = cur->xmlChildrenNode; - while (cur != nullptr) - { - std::string cname((const char*)(cur->name)); - if (cname == "occupation") - { - occ_ptr = cur; - } - cur = cur->next; - } - - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing or incorrect path to H5 file."); - hin.push("PBC"); - PBC = false; - hin.read(PBC, "PBC"); - hin.close(); - } - myComm->bcast(PBC); - if (PBC) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); - - bool success = putFromH5(up, dn, occ_ptr); - - - if (debugc == "yes") - { - app_log() << "UP: Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols() - << std::endl; - app_log() << *up.C << std::endl; - app_log() << "DN: Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols() - << std::endl; - app_log() << *dn.C << std::endl; - } - return success; -} - -bool LCAOSpinorBuilder::putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr occ_ptr) -{ -#ifdef QMC_COMPLEX - if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) - { - myComm->barrier_and_abort("LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); - return false; - } - - bool success = true; - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or incorrect path to H5 file"); - - Matrix upReal; - Matrix upImag; - std::string setname = "/Super_Twist/eigenset_0"; - readRealMatrixFromH5(hin, setname, upReal); - setname += "_imag"; - readRealMatrixFromH5(hin, setname, upImag); - - assert(upReal.rows() == upImag.rows()); - assert(upReal.cols() == upImag.cols()); - - Matrix upTemp(upReal.rows(), upReal.cols()); - for (int i = 0; i < upTemp.rows(); i++) - { - for (int j = 0; j < upTemp.cols(); j++) - { - upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]); - } - } - - Matrix dnReal; - Matrix dnImag; - setname = "/Super_Twist/eigenset_1"; - readRealMatrixFromH5(hin, setname, dnReal); - setname += "_imag"; - readRealMatrixFromH5(hin, setname, dnImag); - - assert(dnReal.rows() == dnImag.rows()); - assert(dnReal.cols() == dnImag.cols()); - - Matrix dnTemp(dnReal.rows(), dnReal.cols()); - for (int i = 0; i < dnTemp.rows(); i++) - { - for (int j = 0; j < dnTemp.cols(); j++) - { - dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]); - } - } - - assert(upReal.rows() == dnReal.rows()); - assert(upReal.cols() == dnReal.cols()); - - Occ.resize(upReal.rows()); - success = putOccupation(up, occ_ptr); - - int norbs = up.getOrbitalSetSize(); - - int n = 0, i = 0; - while (i < norbs) - { - if (Occ[n] > 0.0) - { - std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); - std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); - i++; - } - n++; - } - - hin.close(); - } - -#ifdef HAVE_MPI - myComm->comm.broadcast_n(up.C->data(), up.C->size()); - myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); -#endif - -#else - myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 Must build with QMC_COMPLEX"); -#endif - - return success; -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h index 4c1d490b8b..0d6ff2118f 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h @@ -13,51 +13,12 @@ #ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H #define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H -#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" namespace qmcplusplus { -/** @file LCAOSpinorBuidler.h - * - * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to read up and - * down channel from HDF5 and construct SpinorSet - * - */ -class LCAOSpinorBuilder : public LCAOrbitalBuilder -{ -public: - /** constructor - * \param els reference to the electrons - * \param ions reference to the ions - * - * Derives from LCAOrbitalBuilder, but will require an h5_path to be set - */ - LCAOSpinorBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); - - /** creates and returns SpinorSet - * - * Creates an up and down LCAOrbitalSet - * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file - * registers up and down into a SpinorSet and returns - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -private: - /** load the up and down MO sets - * - * checks to make sure not PBC and initialize the Occ vector. - * call putFromH5 to parse the up and down MO coefficients - */ - bool loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur); +using LCAOSpinorBuilder = LCAOSpinorBuilderT; - /** parse h5 file for spinor info - * - * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part of up component of spinor - * assumes the h5 file as KPTS_0/eigenset_1(_imag) for the real/imag part of dn component of spinor - * reads the various coefficient matricies and broadcast - * after this, we have up/dn LCAOrbitalSet that can be registered to the SpinorSet - */ - bool putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr); -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp index 6b71c88bd3..14de5a549c 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp @@ -137,26 +137,28 @@ LCAOSpinorBuilderT::putFromH5( bool success = true; hdf_archive hin(this->myComm); if (this->myComm->rank() == 0) { - Matrix upReal; - Matrix upImag; - std::string setname = "/Super_Twist/eigenset_0"; - this->readRealMatrixFromH5(hin, setname, upReal); - setname += "_imag"; - this->readRealMatrixFromH5(hin, setname, upImag); - - if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) - this->myComm->barrier_and_abort( - "LCAOSpinorBuilder::putFromH5 missing or " - "incorrect path to H5 file"); - - assert(upReal.rows() == upImag.rows()); - assert(upReal.cols() == upImag.cols()); - - Matrix upTemp(upReal.rows(), upReal.cols()); - for (int i = 0; i < upTemp.rows(); i++) { - for (int j = 0; j < upTemp.cols(); j++) { - upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; - } + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or " + "incorrect path to H5 file"); + + Matrix upReal; + Matrix upImag; + std::string setname = "/Super_Twist/eigenset_0"; + this->readRealMatrixFromH5(hin, setname, upReal); + setname += "_imag"; + this->readRealMatrixFromH5(hin, setname, upImag); + + + assert(upReal.rows() == upImag.rows()); + assert(upReal.cols() == upImag.cols()); + + Matrix upTemp(upReal.rows(), upReal.cols()); + for (int i = 0; i < upTemp.rows(); i++) + { + for (int j = 0; j < upTemp.cols(); j++) + { + upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; + } } Matrix dnReal; @@ -205,6 +207,11 @@ LCAOSpinorBuilderT::putFromH5( return success; } +#ifdef QMC_COMPLEX +#ifndef MIXED_PRECISION template class LCAOSpinorBuilderT>; +#else template class LCAOSpinorBuilderT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp deleted file mode 100644 index d524f60208..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp +++ /dev/null @@ -1,1037 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "LCAOrbitalBuilder.h" -#include "OhmmsData/AttributeSet.h" -#include "QMCWaveFunctions/SPOSet.h" -#include "MultiQuinticSpline1D.h" -#include "Numerics/SoaCartesianTensor.h" -#include "Numerics/SoaSphericalTensor.h" -#include "SoaAtomicBasisSet.h" -#include "SoaLocalizedBasisSet.h" -#include "LCAOrbitalSet.h" -#include "AOBasisBuilder.h" -#include "MultiFunctorAdapter.h" -#if !defined(QMC_COMPLEX) -#include "LCAOrbitalSetWithCorrection.h" -#include "CuspCorrectionConstruction.h" -#endif -#include "hdf/hdf_archive.h" -#include "Message/CommOperators.h" -#include "Utilities/ProgressReportEngine.h" -#include "CPU/math.hpp" - -#include - -namespace qmcplusplus -{ -/** traits for a localized basis set; used by createBasisSet - * - * T radial function value type - * ORBT orbital value type, can be complex - * ROT {0=numuerica;, 1=gto; 2=sto} - * SH {0=cartesian, 1=spherical} - * If too confusing, inroduce enumeration. - */ -template -struct ao_traits -{}; - -/** specialization for numerical-cartesian AO */ -template -struct ao_traits -{ - using radial_type = MultiQuinticSpline1D; - using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; -}; - -/** specialization for numerical-spherical AO */ -template -struct ao_traits -{ - using radial_type = MultiQuinticSpline1D; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; -}; - -/** specialization for GTO-cartesian AO */ -template -struct ao_traits -{ - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; -}; - -/** specialization for GTO-cartesian AO */ -template -struct ao_traits -{ - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; -}; - -/** specialization for STO-spherical AO */ -template -struct ao_traits -{ - using radial_type = MultiFunctorAdapter>; - using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; - using basis_type = SoaLocalizedBasisSet; -}; - - -inline bool is_same(const xmlChar* a, const char* b) { return !strcmp((const char*)a, b); } - -using BasisSet_t = LCAOrbitalSet::basis_type; - -LCAOrbitalBuilder::LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) - : SPOSetBuilder("LCAO", comm), - targetPtcl(els), - sourcePtcl(ions), - h5_path(""), - SuperTwist(0.0), - doCuspCorrection(false) -{ - ClassName = "LCAOrbitalBuilder"; - ReportEngine PRE(ClassName, "createBasisSet"); - - std::string cuspC("no"); // cusp correction - OhmmsAttributeSet aAttrib; - aAttrib.add(cuspC, "cuspCorrection"); - aAttrib.add(h5_path, "href"); - aAttrib.add(PBCImages, "PBCimages"); - aAttrib.add(SuperTwist, "twist"); - aAttrib.put(cur); - - if (cuspC == "yes") - doCuspCorrection = true; - //Evaluate the Phase factor. Equals 1 for OBC. - EvalPeriodicImagePhaseFactors(SuperTwist, PeriodicImagePhaseFactors); - - // no need to wait but load the basis set - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "basisset") - { - std::string basisset_name_input(getXMLAttributeValue(element, "name")); - std::string basisset_name(basisset_name_input.empty() ? "LCAOBSet" : basisset_name_input); - if (basisset_map_.find(basisset_name) != basisset_map_.end()) - { - std::ostringstream err_msg; - err_msg << "Cannot create basisset " << basisset_name << " which already exists." << std::endl; - throw std::runtime_error(err_msg.str()); - } - if (h5_path != "") - basisset_map_[basisset_name] = loadBasisSetFromH5(element); - else - basisset_map_[basisset_name] = loadBasisSetFromXML(element, cur); - } - }); - - // deprecated h5 basis set handling when basisset element is missing - if (basisset_map_.size() == 0 && h5_path != "") - { - app_warning() << "!!!!!!! Deprecated input style: missing basisset element. " - << "LCAO needs an explicit basisset XML element. " - << "Fallback on loading an implicit one." << std::endl; - basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur); - } - - if (basisset_map_.size() == 0) - throw std::runtime_error("No basisset found in the XML input!"); -} - -LCAOrbitalBuilder::~LCAOrbitalBuilder() -{ - //properly cleanup -} - -int LCAOrbitalBuilder::determineRadialOrbType(xmlNodePtr cur) const -{ - std::string keyOpt; - std::string transformOpt; - OhmmsAttributeSet aAttrib; - aAttrib.add(keyOpt, "keyword"); - aAttrib.add(keyOpt, "key"); - aAttrib.add(transformOpt, "transform"); - aAttrib.put(cur); - - int radialOrbType = -1; - if (transformOpt == "yes" || keyOpt == "NMO") - radialOrbType = 0; - else - { - if (keyOpt == "GTO") - radialOrbType = 1; - if (keyOpt == "STO") - radialOrbType = 2; - } - return radialOrbType; -} - -std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) -{ - ReportEngine PRE(ClassName, "loadBasisSetFromXML(xmlNodePtr)"); - int ylm = -1; - { - xmlNodePtr cur1 = cur->xmlChildrenNode; - while (cur1 != NULL && ylm < 0) - { - if (is_same(cur1->name, "atomicBasisSet")) - { - std::string sph; - OhmmsAttributeSet att; - att.add(sph, "angular"); - att.put(cur1); - ylm = (sph == "cartesian") ? 0 : 1; - } - cur1 = cur1->next; - } - } - - if (ylm < 0) - PRE.error("Missing angular attribute of atomicBasisSet.", true); - - int radialOrbType = determineRadialOrbType(cur); - if (radialOrbType < 0) - { - app_warning() << "Radial orbital type cannot be determined based on the attributes of basisset line. " - << "Trying the parent element." << std::endl; - radialOrbType = determineRadialOrbType(parent); - } - - if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify keyword=\"NMO/GTO/STO\" .", true); - - BasisSet_t* myBasisSet = nullptr; - /** process atomicBasisSet per ion species */ - switch (radialOrbType) - { - case (0): //numerical - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - if (ylm) - myBasisSet = createBasisSet<0, 1>(cur); - else - myBasisSet = createBasisSet<0, 0>(cur); - break; - case (1): //gto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - if (ylm) - myBasisSet = createBasisSet<1, 1>(cur); - else - myBasisSet = createBasisSet<1, 0>(cur); - break; - case (2): //sto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - myBasisSet = createBasisSet<2, 1>(cur); - break; - default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); - break; - } - - return std::unique_ptr(myBasisSet); -} - -std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromH5(xmlNodePtr parent) -{ - ReportEngine PRE(ClassName, "loadBasisSetFromH5()"); - - hdf_archive hin(myComm); - int ylm = -1; - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - PRE.error("Could not open H5 file", true); - - hin.push("basisset", false); - - std::string sph; - std::string ElemID0 = "atomicBasisSet0"; - - hin.push(ElemID0.c_str(), false); - - if (!hin.readEntry(sph, "angular")) - PRE.error("Could not find name of basisset group in H5; Probably Corrupt H5 file", true); - ylm = (sph == "cartesian") ? 0 : 1; - hin.close(); - } - - myComm->bcast(ylm); - if (ylm < 0) - PRE.error("Missing angular attribute of atomicBasisSet.", true); - - int radialOrbType = determineRadialOrbType(parent); - if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify keyword=\"NMO/GTO/STO\" .", true); - - BasisSet_t* myBasisSet = nullptr; - /** process atomicBasisSet per ion species */ - switch (radialOrbType) - { - case (0): //numerical - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - if (ylm) - myBasisSet = createBasisSetH5<0, 1>(); - else - myBasisSet = createBasisSetH5<0, 0>(); - break; - case (1): //gto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - if (ylm) - myBasisSet = createBasisSetH5<1, 1>(); - else - myBasisSet = createBasisSetH5<1, 0>(); - break; - case (2): //sto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; - myBasisSet = createBasisSetH5<2, 1>(); - break; - default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); - break; - } - return std::unique_ptr(myBasisSet); -} - - -template -LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSet(xmlNodePtr cur) -{ - ReportEngine PRE(ClassName, "createBasisSet(xmlNodePtr)"); - - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; - - basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - - //list of built centers - std::vector ao_built_centers; - - /** process atomicBasisSet per ion species */ - cur = cur->xmlChildrenNode; - while (cur != NULL) //loop over unique ioons - { - std::string cname((const char*)(cur->name)); - - if (cname == "atomicBasisSet") - { - std::string elementType; - std::string sph; - OhmmsAttributeSet att; - att.add(elementType, "elementType"); - att.put(cur); - - if (elementType.empty()) - PRE.error("Missing elementType attribute of atomicBasisSet.", true); - - auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); - if (it == ao_built_centers.end()) - { - AOBasisBuilder any(elementType, myComm); - any.put(cur); - auto aoBasis = any.createAOSet(cur); - if (aoBasis) - { - //add the new atomic basis to the basis set - int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); - mBasisSet->add(activeCenter, std::move(aoBasis)); - } - ao_built_centers.push_back(elementType); - } - } - cur = cur->next; - } // done with basis set - mBasisSet->setBasisSetSize(-1); - mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); - return mBasisSet; -} - - -template -LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() -{ - ReportEngine PRE(ClassName, "createBasisSetH5(xmlNodePtr)"); - - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; - - basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - - //list of built centers - std::vector ao_built_centers; - - int Nb_Elements(0); - std::string basiset_name; - - /** process atomicBasisSet per ion species */ - app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl; - - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - PRE.error("Could not open H5 file", true); - - hin.push("basisset", false); - - hin.read(Nb_Elements, "NbElements"); - } - - myComm->bcast(Nb_Elements); - if (Nb_Elements < 1) - PRE.error("Missing elementType attribute of atomicBasisSet.", true); - - for (int i = 0; i < Nb_Elements; i++) - { - std::string elementType, dataset; - std::stringstream tempElem; - std::string ElemID0 = "atomicBasisSet", ElemType; - tempElem << ElemID0 << i; - ElemType = tempElem.str(); - - if (myComm->rank() == 0) - { - hin.push(ElemType.c_str(), false); - - if (!hin.readEntry(basiset_name, "name")) - PRE.error("Could not find name of basisset group in H5; Probably Corrupt H5 file", true); - if (!hin.readEntry(elementType, "elementType")) - PRE.error("Could not read elementType in H5; Probably Corrupt H5 file", true); - } - myComm->bcast(basiset_name); - myComm->bcast(elementType); - - auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); - if (it == ao_built_centers.end()) - { - AOBasisBuilder any(elementType, myComm); - any.putH5(hin); - auto aoBasis = any.createAOSetH5(hin); - if (aoBasis) - { - //add the new atomic basis to the basis set - int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); - mBasisSet->add(activeCenter, std::move(aoBasis)); - } - ao_built_centers.push_back(elementType); - } - - if (myComm->rank() == 0) - hin.pop(); - } - - if (myComm->rank() == 0) - { - hin.pop(); - hin.close(); - } - mBasisSet->setBasisSetSize(-1); - mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); - return mBasisSet; -} - - -std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); - std::string spo_name(""), cusp_file(""), optimize("no"); - std::string basisset_name("LCAOBSet"); - OhmmsAttributeSet spoAttrib; - spoAttrib.add(spo_name, "name"); - spoAttrib.add(spo_name, "id"); - spoAttrib.add(cusp_file, "cuspInfo"); - spoAttrib.add(basisset_name, "basisset"); - spoAttrib.put(cur); - - std::unique_ptr myBasisSet; - if (basisset_map_.find(basisset_name) == basisset_map_.end()) - myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); - else - myBasisSet.reset(basisset_map_[basisset_name]->makeClone()); - - std::unique_ptr sposet; - if (doCuspCorrection) - { -#if defined(QMC_COMPLEX) - myComm->barrier_and_abort( - "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not supported on complex LCAO."); -#else - app_summary() << " Using cusp correction." << std::endl; - auto lcwc = std::make_unique(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); - sposet = std::move(lcwc); -#endif - } - else - { - auto lcos = std::make_unique(spo_name, std::move(myBasisSet)); - loadMO(*lcos, cur); - sposet = std::move(lcos); - } - -#if !defined(QMC_COMPLEX) - if (doCuspCorrection) - { - // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for further computations. - // The coordinates get set to nuclear positions, which leads to zero e-N distance, - // which causes a NaN in SoaAtomicBasisSet.h - // This problem only appears when the electron positions are specified in the input. - // The random particle placement step executes after this part of the code, overwriting - // the leftover positions from the cusp initialization. - ParticleSet tmp_targetPtcl(targetPtcl); - - const int num_centers = sourcePtcl.getTotalNum(); - auto& lcwc = dynamic_cast(*sposet); - - const int orbital_set_size = lcwc.getOrbitalSetSize(); - Matrix info(num_centers, orbital_set_size); - - // set a default file name if not given - if (cusp_file.empty()) - cusp_file = spo_name + ".cuspInfo.xml"; - - bool file_exists(myComm->rank() == 0 && std::ifstream(cusp_file).good()); - myComm->bcast(file_exists); - app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; - - // validate file if it exists - if (file_exists) - { - bool valid = 0; - if (myComm->rank() == 0) - valid = readCuspInfo(cusp_file, spo_name, orbital_set_size, info); - myComm->bcast(valid); - if (!valid) - myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); -#ifdef HAVE_MPI - for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - broadcastCuspInfo(info(center_idx, orb_idx), *myComm, 0); -#endif - } - else - { - generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *myComm); - if (myComm->rank() == 0) - saveCusp(cusp_file, info, spo_name); - } - - applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); - } -#endif - - return sposet; -} - - -/** Parse the xml file for information on the Dirac determinants. - *@param cur the current xmlNode - */ -bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) -{ -#undef FunctionName -#define FunctionName \ - printf("Calling FunctionName from %s\n", __FUNCTION__); \ - FunctionNameReal - //Check if HDF5 present - ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)"); - - //initialize the number of orbital by the basis set size - int norb = spo.getBasisSetSize(); - std::string debugc("no"); - double orbital_mix_magnitude = 0.0; - bool PBC = false; - OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "orbitals"); - aAttrib.add(norb, "size"); - aAttrib.add(debugc, "debug"); - aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude"); - aAttrib.put(cur); - xmlNodePtr occ_ptr = NULL; - xmlNodePtr coeff_ptr = NULL; - cur = cur->xmlChildrenNode; - while (cur != NULL) - { - std::string cname((const char*)(cur->name)); - if (cname == "occupation") - { - occ_ptr = cur; - } - else if (cname.find("coeff") < cname.size() || cname == "parameter" || cname == "Var") - { - coeff_ptr = cur; - } - cur = cur->next; - } - if (coeff_ptr == NULL) - { - app_log() << " Using Identity for the LCOrbitalSet " << std::endl; - return true; - } - spo.setOrbitalSetSize(norb); - bool success = putOccupation(spo, occ_ptr); - if (h5_path == "") - success = putFromXML(spo, coeff_ptr); - else - { - hdf_archive hin(myComm); - - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); - - try - { - hin.push("PBC", false); - PBC = true; - } - catch (const std::exception& e) - { - app_debug() << e.what() << std::endl; - PBC = false; - } - - if (PBC) - hin.read(PBC, "PBC"); - - hin.close(); - } - myComm->bcast(PBC); - if (PBC) - success = putPBCFromH5(spo, coeff_ptr); - else - success = putFromH5(spo, coeff_ptr); - } - - // Ye: used to construct cusp correction - //bool success2 = transformSPOSet(); - if (debugc == "yes") - { - app_log() << " Single-particle orbital coefficients dims=" << spo.C->rows() << " x " << spo.C->cols() - << std::endl; - app_log() << *spo.C << std::endl; - } - - return success; -} - -bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) -{ - int norbs = 0; - OhmmsAttributeSet aAttrib; - aAttrib.add(norbs, "size"); - aAttrib.add(norbs, "orbitals"); - aAttrib.put(coeff_ptr); - if (norbs < spo.getOrbitalSetSize()) - { - return false; - APP_ABORT("LCAOrbitalBuilder::putFromXML missing or incorrect size"); - } - if (norbs) - { - std::vector Ctemp; - int BasisSetSize = spo.getBasisSetSize(); - Ctemp.resize(norbs * BasisSetSize); - putContent(Ctemp, coeff_ptr); - int n = 0, i = 0; - std::vector::iterator cit(Ctemp.begin()); - while (i < spo.getOrbitalSetSize()) - { - if (Occ[n] > std::numeric_limits::epsilon()) - { - std::copy(cit, cit + BasisSetSize, (*spo.C)[i]); - i++; - } - n++; - cit += BasisSetSize; - } - } - return true; -} - -/** read data from a hdf5 file - * @param norb number of orbitals to be initialized - * @param coeff_ptr xmlnode for coefficients - */ -bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) -{ - int neigs = spo.getBasisSetSize(); - int setVal = -1; - OhmmsAttributeSet aAttrib; - aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); - aAttrib.put(coeff_ptr); - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); - - Matrix Ctemp; - std::array name; - - - //This is to make sure of Backward compatibility with previous tags. - int name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - std::string setname(name.data(), name_len); - if (!hin.readEntry(Ctemp, setname)) - { - name_len = std::snprintf(name.data(), name.size(), "%s%d", "/KPTS_0/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - setname = std::string(name.data(), name_len); - hin.read(Ctemp, setname); - } - hin.close(); - - if (Ctemp.cols() != spo.getBasisSetSize()) - { - std::ostringstream err_msg; - err_msg << "Basis set size " << spo.getBasisSetSize() << " mismatched the number of MO coefficients columns " - << Ctemp.cols() << " from h5." << std::endl; - myComm->barrier_and_abort(err_msg.str()); - } - - int norbs = spo.getOrbitalSetSize(); - if (Ctemp.rows() < norbs) - { - std::ostringstream err_msg; - err_msg << "Need " << norbs << " orbitals. Insufficient rows of MO coefficients " << Ctemp.rows() << " from h5." - << std::endl; - myComm->barrier_and_abort(err_msg.str()); - } - - int n = 0, i = 0; - while (i < norbs) - { - if (Occ[n] > 0.0) - { - std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); - i++; - } - n++; - } - } - myComm->bcast(spo.C->data(), spo.C->size()); - return true; -} - - -/** read data from a hdf5 file - * @param norb number of orbitals to be initialized - * @param coeff_ptr xmlnode for coefficients - */ -bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) -{ - ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5"); - int norbs = spo.getOrbitalSetSize(); - int neigs = spo.getBasisSetSize(); - int setVal = -1; - bool IsComplex = false; - bool MultiDet = false; - PosType SuperTwist(0.0); - PosType SuperTwistH5(0.0); - OhmmsAttributeSet aAttrib; - aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); - aAttrib.put(coeff_ptr); - hdf_archive hin(myComm); - - xmlNodePtr curtemp = coeff_ptr; - - std::string xmlTag("determinantset"); - std::string MSDTag("sposet"); - std::string SDTag("determinant"); - std::string EndTag("qmcsystem"); - std::string curname; - - do - { - std::stringstream ss; - curtemp = curtemp->parent; - ss << curtemp->name; - ss >> curname; - if (curname == MSDTag) - MultiDet = true; ///Used to know if running an MSD calculation - needed for order of Orbitals. - if (curname == SDTag) - MultiDet = false; - - } while ((xmlTag != curname) && (curname != EndTag)); - if (curname == EndTag) - { - APP_ABORT( - "Could not find in wf file the \"sposet\" or \"determinant\" tags. Please verify input or contact developers"); - } - - aAttrib.add(SuperTwist, "twist"); - aAttrib.put(curtemp); - - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); - hin.push("parameters"); - hin.read(IsComplex, "IsComplex"); - hin.pop(); - - std::string setname("/Super_Twist/Coord"); - hin.read(SuperTwistH5, setname); - if (std::abs(SuperTwistH5[0] - SuperTwist[0]) >= 1e-6 || std::abs(SuperTwistH5[1] - SuperTwist[1]) >= 1e-6 || - std::abs(SuperTwistH5[2] - SuperTwist[2]) >= 1e-6) - { - app_log() << "Super Twist in XML : " << SuperTwist[0] << " In H5:" << SuperTwistH5[0] << std::endl; - app_log() << " " << SuperTwist[1] << " " << SuperTwistH5[1] << std::endl; - app_log() << " " << SuperTwist[2] << " " << SuperTwistH5[2] << std::endl; - app_log() << "Diff in Coord x :" << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl; - app_log() << " y :" << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl; - app_log() << " z :" << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl; - APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do not Match!!! Aborting."); - } - //SuperTwist=SuperTwistH5; - Matrix Ctemp; - LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet); - - int n = 0, i = 0; - while (i < norbs) - { - if (Occ[n] > 0.0) - { - std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]); - i++; - } - n++; - } - - hin.close(); - } -#ifdef HAVE_MPI - myComm->comm.broadcast_n(spo.C->data(), spo.C->size()); -#endif - return true; -} - - -bool LCAOrbitalBuilder::putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr) -{ - //die?? - if (spo.getBasisSetSize() == 0) - { - APP_ABORT("LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize"); - return false; - } - Occ.resize(std::max(spo.getBasisSetSize(), spo.getOrbitalSetSize())); - Occ = 0.0; - for (int i = 0; i < spo.getOrbitalSetSize(); i++) - Occ[i] = 1.0; - std::vector occ_in; - std::string occ_mode("table"); - if (occ_ptr == NULL) - { - occ_mode = "ground"; - } - else - { - const std::string o(getXMLAttributeValue(occ_ptr, "mode")); - if (!o.empty()) - occ_mode = o; - } - //Do nothing if mode == ground - if (occ_mode == "excited") - { - putContent(occ_in, occ_ptr); - for (int k = 0; k < occ_in.size(); k++) - { - if (occ_in[k] < 0) //remove this, -1 is to adjust the base - Occ[-occ_in[k] - 1] = 0.0; - else - Occ[occ_in[k] - 1] = 1.0; - } - } - else if (occ_mode == "table") - { - putContent(Occ, occ_ptr); - } - return true; -} - -void LCAOrbitalBuilder::readRealMatrixFromH5(hdf_archive& hin, - const std::string& setname, - Matrix& Creal) const -{ - hin.read(Creal, setname); -} - -void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix>& Ctemp, - bool MultiDet) -{ - Matrix Creal; - Matrix Ccmplx; - - std::array name; - int name_len{0}; - ///When running Single Determinant calculations, MO coeff loaded based on occupation and lowest eingenvalue. - ///However, for solids with multideterminants, orbitals are order by kpoints; first all MOs for kpoint 1, then 2 etc - /// The multideterminants occupation is specified in the input/HDF5 and theefore as long as there is consistency between - /// the order in which we read the orbitals and the occupation, we are safe. In the case of Multideterminants generated - /// by pyscf and Quantum Package, They are stored in the same order as generated for quantum package and one should use - /// the orbitals labelled eigenset_unsorted. - - if (MultiDet == false) - name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - else - name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - - std::string setname(name.data(), name_len); - readRealMatrixFromH5(hin, setname, Creal); - - bool IsComplex = true; - hin.read(IsComplex, "/parameters/IsComplex"); - if (IsComplex == false) - { - Ccmplx.resize(Creal.rows(), Creal.cols()); - Ccmplx = 0.0; - } - else - { - setname += "_imag"; - readRealMatrixFromH5(hin, setname, Ccmplx); - } - - Ctemp.resize(Creal.rows(), Creal.cols()); - for (int i = 0; i < Ctemp.rows(); i++) - for (int j = 0; j < Ctemp.cols(); j++) - Ctemp[i][j] = std::complex(Creal[i][j], Ccmplx[i][j]); -} - -void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix& Creal, - bool MultiDet) -{ - bool IsComplex = false; - hin.read(IsComplex, "/parameters/IsComplex"); - if (IsComplex && - (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 || std::abs(SuperTwist[2]) >= 1e-6)) - { - std::string setname("This Wavefunction is Complex and you are using the real version of QMCPACK. " - "Please re-run this job with the Complex build of QMCPACK."); - APP_ABORT(setname.c_str()); - } - - std::array name; - int name_len{0}; - bool PBC = false; - hin.read(PBC, "/PBC/PBC"); - if (MultiDet && PBC) - name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal); - else - name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); - if (name_len < 0) - throw std::runtime_error("Error generating name"); - - readRealMatrixFromH5(hin, std::string(name.data(), name_len), Creal); -} - -/// Periodic Image Phase Factors computation to be determined -void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors(PosType SuperTwist, - std::vector& LocPeriodicImagePhaseFactors) -{ - const int NbImages = (PBCImages[0] + 1) * (PBCImages[1] + 1) * (PBCImages[2] + 1); - LocPeriodicImagePhaseFactors.resize(NbImages); - for (size_t i = 0; i < NbImages; i++) - LocPeriodicImagePhaseFactors[i] = 1.0; -} - -void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors(PosType SuperTwist, - std::vector>& LocPeriodicImagePhaseFactors) -{ - // Allow computation to continue with no HDF file if the system has open boundary conditions. - // The complex build is usually only used with open BC for testing. - bool usesOpenBC = PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0; - - ///Exp(ik.g) where i is imaginary, k is the supertwist and g is the translation vector PBCImage. - if (h5_path != "" && !usesOpenBC) - { - hdf_archive hin(myComm); - if (myComm->rank() == 0) - { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("Could not open H5 file"); - - hin.push("Cell", false); - - hin.read(Lattice, "LatticeVectors"); - hin.close(); - } - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - myComm->bcast(Lattice(i, j)); - } - else if (!usesOpenBC) - { - APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour is unknown. Safer to exit"); - } - - int phase_idx = 0; - int TransX, TransY, TransZ; - RealType phase; - - for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X - { - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y - { - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z - { - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - RealType s, c; - PosType Val; - Val[0] = TransX * Lattice(0, 0) + TransY * Lattice(1, 0) + TransZ * Lattice(2, 0); - Val[1] = TransX * Lattice(0, 1) + TransY * Lattice(1, 1) + TransZ * Lattice(2, 1); - Val[2] = TransX * Lattice(0, 2) + TransY * Lattice(1, 2) + TransZ * Lattice(2, 2); - - phase = dot(SuperTwist, Val); - qmcplusplus::sincos(phase, &s, &c); - - LocPeriodicImagePhaseFactors.emplace_back(c, s); - } - } - } -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h index 9bc344b285..997468fd82 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h @@ -18,101 +18,13 @@ #ifndef QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDER_H #define QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDER_H -#include -#include "QMCWaveFunctions/BasisSetBase.h" -#include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" + namespace qmcplusplus { -/** SPOSetBuilder using new LCAOrbitalSet and Soa versions - * - * Reimplement MolecularSPOSetBuilder - * - support both CartesianTensor and SphericalTensor - */ -class LCAOrbitalBuilder : public SPOSetBuilder -{ -public: - using BasisSet_t = LCAOrbitalSet::basis_type; - /** constructor - * \param els reference to the electrons - * \param ions reference to the ions - */ - LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); - ~LCAOrbitalBuilder() override; - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -protected: - ///target ParticleSet - ParticleSet& targetPtcl; - ///source ParticleSet - ParticleSet& sourcePtcl; - /// localized basis set map - std::map> basisset_map_; - /// if true, add cusp correction to orbitals - bool cuspCorr; - ///Path to HDF5 Wavefunction - std::string h5_path; - ///Number of periodic Images for Orbital evaluation - TinyVector PBCImages; - ///Coordinates Super Twist - PosType SuperTwist; - ///Periodic Image Phase Factors. Correspond to the phase from the PBCImages. Computed only once. - std::vector PeriodicImagePhaseFactors; - ///Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors - Tensor Lattice; - - /// Enable cusp correction - bool doCuspCorrection; - - /** create basis set - * - * Use ao_traits to match (ROT)x(SH) combo - */ - template - BasisSet_t* createBasisSet(xmlNodePtr cur); - template - BasisSet_t* createBasisSetH5(); - - // The following items were previously in SPOSet - ///occupation number - Vector Occ; - bool loadMO(LCAOrbitalSet& spo, xmlNodePtr cur); - bool putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr); - bool putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - bool putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - bool putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - // the dimensions of Ctemp are determined by the dataset on file - void LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix>& Ctemp, - bool MultiDet); - // the dimensions of Creal are determined by the dataset on file - void LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, Matrix& Creal, bool Multidet); - void EvalPeriodicImagePhaseFactors(PosType SuperTwist, std::vector& LocPeriodicImagePhaseFactors); - void EvalPeriodicImagePhaseFactors(PosType SuperTwist, - std::vector>& LocPeriodicImagePhaseFactors); - /** read matrix from h5 file - * \param[in] hin: hdf5 arhive to be read from - * \param setname: where to read from in hdf5 archive - * \param[out] Creal: matrix read from h5 - * - * added in header to allow use from derived class LCAOSpinorBuilder as well - */ - void readRealMatrixFromH5(hdf_archive& hin, - const std::string& setname, - Matrix& Creal) const; - -private: - ///load a basis set from XML input - std::unique_ptr loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); - ///load a basis set from h5 file - std::unique_ptr loadBasisSetFromH5(xmlNodePtr parent); - ///determine radial orbital type based on "keyword" and "transform" attributes - int determineRadialOrbType(xmlNodePtr cur) const; -}; - +using LCAOrbitalBuilder = LCAOrbitalBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp index 39ea3953ee..5abad9e950 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp @@ -475,7 +475,7 @@ LCAOrbitalBuilderT::createBasisSetH5() mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors); return mBasisSet; } - +#ifndef QMC_COMPLEX template <> std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, @@ -624,6 +624,8 @@ LCAOrbitalBuilderT::createWithCuspCorrection(xmlNodePtr cur, return sposet; } +#else + template <> std::unique_ptr>> LCAOrbitalBuilderT>::createWithCuspCorrection( @@ -646,6 +648,8 @@ LCAOrbitalBuilderT>::createWithCuspCorrection( return std::unique_ptr>>{}; } +#endif + template std::unique_ptr> LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) @@ -669,8 +673,7 @@ LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) std::unique_ptr> sposet; if (doCuspCorrection) { - createWithCuspCorrection( - cur, spo_name, cusp_file, std::move(myBasisSet)); + sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet)); } else { auto lcos = std::make_unique>( @@ -1180,8 +1183,17 @@ LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors(PosType SuperTwist, } } +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION template class LCAOrbitalBuilderT; +#else template class LCAOrbitalBuilderT; +#endif +#else +#ifndef MIXED_PRECISION template class LCAOrbitalBuilderT>; +#else template class LCAOrbitalBuilderT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp deleted file mode 100644 index 71e9ed3f6f..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp +++ /dev/null @@ -1,983 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "LCAOrbitalSet.h" -#include "Numerics/MatrixOperators.h" -#include "CPU/BLAS.hpp" -#include - -namespace qmcplusplus -{ - -struct LCAOrbitalSet::LCAOMultiWalkerMem : public Resource -{ - LCAOMultiWalkerMem() : Resource("LCAOrbitalSet") {} - LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] - OffloadMWVGLArray basis_vgl_mw; // [5][NW][NumAO] - OffloadMWVArray phi_v; // [NW][NumMO] - OffloadMWVArray basis_v_mw; // [NW][NumAO] - OffloadMWVArray vp_phi_v; // [NVPs][NumMO] - OffloadMWVArray vp_basis_v_mw; // [NVPs][NumAO] -}; - -LCAOrbitalSet::LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs) - : SPOSet(my_name), - BasisSetSize(bs ? bs->getBasisSetSize() : 0), - Identity(true), - basis_timer_(createGlobalTimer("LCAOrbitalSet::Basis", timer_level_fine)), - mo_timer_(createGlobalTimer("LCAOrbitalSet::MO", timer_level_fine)) -{ - if (!bs) - throw std::runtime_error("LCAOrbitalSet cannot take nullptr as its basis set!"); - myBasisSet = std::move(bs); - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - OrbitalSetSize = BasisSetSize; - LCAOrbitalSet::checkObject(); -} - -LCAOrbitalSet::LCAOrbitalSet(const LCAOrbitalSet& in) - : SPOSet(in), - myBasisSet(in.myBasisSet->makeClone()), - C(in.C), - BasisSetSize(in.BasisSetSize), - C_copy(in.C_copy), - Identity(in.Identity), - basis_timer_(in.basis_timer_), - mo_timer_(in.mo_timer_) -{ - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - if (!in.Identity) - { - Tempv.resize(OrbitalSetSize); - Temphv.resize(OrbitalSetSize); - Tempghv.resize(OrbitalSetSize); - } - LCAOrbitalSet::checkObject(); -} - -void LCAOrbitalSet::setOrbitalSetSize(int norbs) -{ - if (C) - throw std::runtime_error("LCAOrbitalSet::setOrbitalSetSize cannot reset existing MO coefficients"); - - Identity = false; - OrbitalSetSize = norbs; - C = std::make_shared(OrbitalSetSize, BasisSetSize); - Tempv.resize(OrbitalSetSize); - Temphv.resize(OrbitalSetSize); - Tempghv.resize(OrbitalSetSize); - LCAOrbitalSet::checkObject(); -} - -void LCAOrbitalSet::checkObject() const -{ - if (Identity) - { - if (OrbitalSetSize != BasisSetSize) - throw std::runtime_error( - "LCAOrbitalSet::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!"); - if (C) - throw std::runtime_error("LCAOrbitalSet::checkObject C should be nullptr if Identity = true!"); - } - else - { - if (!C) - throw std::runtime_error("LCAOrbitalSet::checkObject C should not be nullptr if Identity = false!"); - if (OrbitalSetSize != C->rows()) - throw std::runtime_error("LCAOrbitalSet::checkObject C rows doesn't match OrbitalSetSize."); - if (BasisSetSize != C->cols()) - throw std::runtime_error("LCAOrbitalSet::checkObject C columns doesn't match BasisSetSize."); - } -} - -void LCAOrbitalSet::createResource(ResourceCollection& collection) const -{ - myBasisSet->createResource(collection); - - auto resource_index = collection.addResource(std::make_unique()); -} - -void LCAOrbitalSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - - spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); - - spo_leader.mw_mem_handle_ = collection.lendResource(); -} - -void LCAOrbitalSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - - spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); - - collection.takebackResource(spo_leader.mw_mem_handle_); -} - -RefVectorWithLeader LCAOrbitalSet::extractBasisRefList( - const RefVectorWithLeader& spo_list) const -{ - RefVectorWithLeader basis_list(*spo_list.getCastedLeader().myBasisSet); - basis_list.reserve(spo_list.size()); - for (size_t iw = 0; iw < spo_list.size(); iw++) - basis_list.push_back(*spo_list.getCastedElement(iw).myBasisSet); - return basis_list; -} -std::unique_ptr LCAOrbitalSet::makeClone() const { return std::make_unique(*this); } - -void LCAOrbitalSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - if (Identity) - { //PAY ATTENTION TO COMPLEX - myBasisSet->evaluateV(P, iat, psi.data()); - } - else - { - Vector vTemp(Temp.data(0), BasisSetSize); - myBasisSet->evaluateV(P, iat, vTemp.data()); - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - MatrixOperators::product(C_partial_view, vTemp, psi); - } -} - -/** Find a better place for other user classes, Matrix should be padded as well */ -template -inline void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) -{ - constexpr char transa = 't'; - constexpr char transb = 'n'; - constexpr T zone(1); - constexpr T zero(0); - BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), - C.capacity()); -} - -inline void LCAOrbitalSet::evaluate_vgl_impl(const vgl_type& temp, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2psi.data()); -} - -inline void LCAOrbitalSet::evaluate_vgh_impl(const vgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_vghgh_impl(const vghgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi, - GGGMatrix& dghpsi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xyz = temp.data(14); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - - dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz - dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz - dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -inline void LCAOrbitalSet::evaluate_vghgh_impl(const vghgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi, - GGGVector& dghpsi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xyz = temp.data(14); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - - dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz - dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const -{ - const size_t output_size = dpsi.size(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[j][0] = -gx[j]; - dpsi[j][1] = -gy[j]; - dpsi[j][2] = -gz[j]; - } -} - - -void LCAOrbitalSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - //TAKE CARE OF IDENTITY - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateVGL(P, iat, Temp); - } - - if (Identity) - evaluate_vgl_impl(Temp, psi, dpsi, d2psi); - else - { - assert(psi.size() <= OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temp, C_partial_view, Tempv); - } - evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); - } -} - -void LCAOrbitalSet::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; - - phi_vgl_v.resize(DIM_VGL, spo_list.size(), OrbitalSetSize); - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - - const size_t nw = phi_vgl_v.size(1); - - //TODO: make this cleaner? - for (int iw = 0; iw < nw; iw++) - { - const size_t output_size = psi_v_list[iw].get().size(); - std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); - std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); - // grads are [dim, walker, orb] in phi_vgl_v - // [walker][orb, dim] in dpsi_v_list - for (size_t idim = 0; idim < DIM; idim++) - BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], DIM); - } -} - -void LCAOrbitalSet::mw_evaluateVGLImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& phi_vgl_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& basis_vgl_mw = spo_leader.mw_mem_handle_.getResource().basis_vgl_mw; - basis_vgl_mw.resize(DIM_VGL, spo_list.size(), BasisSetSize); - - { - ScopedTimer local(basis_timer_); - myBasisSet->mw_evaluateVGL(P_list, iat, basis_vgl_mw); - } - - if (Identity) - { - // output_size can be smaller than BasisSetSize - const size_t output_size = phi_vgl_v.size(2); - const size_t nw = phi_vgl_v.size(1); - - for (size_t idim = 0; idim < DIM_VGL; idim++) - for (int iw = 0; iw < nw; iw++) - std::copy_n(basis_vgl_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); - } - else - { - const size_t requested_orb_size = phi_vgl_v.size(2); - assert(requested_orb_size <= OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - // TODO: make class for general blas interface in Platforms - // have instance of that class as member of LCAOrbitalSet, call gemm through that - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size() * DIM_VGL, // walkers * DIM_VGL - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_vgl_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), - requested_orb_size); - } - } -} - -void LCAOrbitalSet::mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - OffloadMWVArray& vp_phi_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - //const size_t nw = spo_list.size(); - auto& vp_basis_v_mw = spo_leader.mw_mem_handle_.getResource().vp_basis_v_mw; - //Splatter basis_v - const size_t nVPs = vp_phi_v.size(0); - vp_basis_v_mw.resize(nVPs, BasisSetSize); - - myBasisSet->mw_evaluateValueVPs(vp_list, vp_basis_v_mw); - - if (Identity) - { - std::copy_n(vp_basis_v_mw.data_at(0, 0), OrbitalSetSize * nVPs, vp_phi_v.data_at(0, 0)); - } - else - { - const size_t requested_orb_size = vp_phi_v.size(1); - assert(requested_orb_size <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - nVPs, // walkers * Virtual Particles - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, vp_basis_v_mw.data(), BasisSetSize, 0, vp_phi_v.data(), - requested_orb_size); - } -} -void LCAOrbitalSet::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; - phi_v.resize(spo_list.size(), OrbitalSetSize); - mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); - - const size_t output_size = phi_v.size(1); - const size_t nw = phi_v.size(0); - - for (int iw = 0; iw < nw; iw++) - std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); -} - -void LCAOrbitalSet::mw_evaluateValueImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& phi_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - const size_t nw = spo_list.size(); - auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; - basis_v_mw.resize(nw, BasisSetSize); - - myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw); - - if (Identity) - { - std::copy_n(basis_v_mw.data_at(0, 0), OrbitalSetSize * nw, phi_v.data_at(0, 0)); - } - else - { - const size_t requested_orb_size = phi_v.size(1); - assert(requested_orb_size <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size(), // walkers - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), - requested_orb_size); - } -} - -void LCAOrbitalSet::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& vp_phi_v = spo_leader.mw_mem_handle_.getResource().vp_phi_v; - - const size_t nVPs = VirtualParticleSet::countVPs(vp_list); - const size_t requested_orb_size = psi_list[0].get().size(); - vp_phi_v.resize(nVPs, requested_orb_size); - - mw_evaluateValueVPsImplGEMM(spo_list, vp_list, vp_phi_v); - - ///To be computed on Device through new varuable mw_ratios_list, then copied to ratios_list on host. - size_t index = 0; - for (size_t iw = 0; iw < vp_list.size(); iw++) - for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) - ratios_list[iw][iat] = simd::dot(vp_phi_v.data_at(index++, 0), invRow_ptr_list[iw], requested_orb_size); -} - -void LCAOrbitalSet::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - Vector vTemp(Temp.data(0), BasisSetSize); - Vector invTemp(Temp.data(1), BasisSetSize); - - { - ScopedTimer local(mo_timer_); - // when only a subset of orbitals is used, extract limited rows of C. - Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); - MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); - } - - for (size_t j = 0; j < VP.getTotalNum(); j++) - { - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateV(VP, j, vTemp.data()); - } - ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); - } -} - -void LCAOrbitalSet::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - // Device data of phi_vgl_v must be up-to-date upon return - phi_vgl_v.updateTo(); - - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - for (int iw = 0; iw < nw; iw++) - { - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); - GradType dphi; - for (size_t idim = 0; idim < DIM; idim++) - dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; - grads[iw] = dphi; - } -} - -void LCAOrbitalSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) -{ - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGH(P, iat, Temph); - if (Identity) - evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); - else - { - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); - } -} - -void LCAOrbitalSet::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& dhpsi, - GGGVector& dghpsi) -{ - // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n"); - - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - if (Identity) - evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); - else - { - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); - } -} - -/* implement using gemm algorithm */ -inline void LCAOrbitalSet::evaluate_vgl_impl(const vgl_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) const -{ - const size_t output_size = logdet.cols(); - std::copy_n(temp.data(0), output_size, logdet[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dlogdet[i][j][0] = gx[j]; - dlogdet[i][j][1] = gy[j]; - dlogdet[i][j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2logdet[i]); -} - -inline void LCAOrbitalSet::evaluate_vgh_impl(const vgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const -{ - const size_t output_size = dpsi.cols(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, - int i, - GradMatrix& dpsi, - HessMatrix& dgpsi, - GradMatrix& dlpsi) const -{ - const size_t output_size = dpsi.cols(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - - dgpsi[i][j](0, 0) = -hxx[j]; - dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; - dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; - dgpsi[i][j](1, 1) = -hyy[j]; - dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; - dgpsi[i][j](2, 2) = -hzz[j]; - - //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector. - dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); - dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); - dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - Product_ABt(Temp, C_partial_view, Tempv); - evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); - } - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp); - evaluate_ionderiv_v_impl(Temp, i, gradphi); - } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp); - Product_ABt(Temp, *C, Tempv); - evaluate_ionderiv_v_impl(Tempv, i, gradphi); - } - } -} - -void LCAOrbitalSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh); - evaluate_ionderiv_vgl_impl(Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); - } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh); - Product_ABt(Tempgh, *C, Tempghv); - evaluate_ionderiv_vgl_impl(Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); - // evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) -{ - if (Identity) - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp); - evaluate_ionderiv_v_row_impl(Temp, gradphi); - } - else - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp); - Product_ABt(Temp, *C, Tempv); - evaluate_ionderiv_v_row_impl(Tempv, gradphi); - } -} - -void LCAOrbitalSet::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (!use_stored_copy) - *C_copy = *C; - //gemm is out-of-place - BLAS::gemm('N', 'T', BasisSetSize, OrbitalSetSize, OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, - rot_mat.data(), OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); - - /* debugging code - app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl; - for (int j = 0; j < OrbitalSetSize; j++) - for (int i = 0; i < BasisSetSize; i++) - { - app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific - << *(C->data() + j * BasisSetSize + i); - - if ((j * BasisSetSize + i + 1) % 4 == 0) - app_log() << std::endl; - } - */ -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h index cf6706df95..811105330d 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h @@ -13,310 +13,13 @@ #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H -#include -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" - -#include "Numerics/MatrixOperators.h" -#include "Numerics/DeterminantOperators.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" namespace qmcplusplus { -/** class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants. - * - * SoA verson of LCOrtbitalSet - * Localized basis set is always real - */ -struct LCAOrbitalSet : public SPOSet -{ -public: - using basis_type = SoaBasisSetBase; - using vgl_type = basis_type::vgl_type; - using vgh_type = basis_type::vgh_type; - using vghgh_type = basis_type::vghgh_type; - - ///pointer to the basis set - std::unique_ptr myBasisSet; - /// pointer to matrix containing the coefficients - std::shared_ptr C; - - /** constructor - * @param bs pointer to the BasisSet - */ - LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs); - - LCAOrbitalSet(const LCAOrbitalSet& in); - - std::string getClassName() const final { return "LCAOrbitalSet"; } - - bool isRotationSupported() const final { return true; } - - bool hasIonDerivs() const final { return true; } - - std::unique_ptr makeClone() const final; - - void storeParamsBeforeRotation() final { C_copy = std::make_shared(*C); } - - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; - - /** set the OrbitalSetSize and Identity=false and initialize internal storages - */ - void setOrbitalSetSize(int norbs) final; - - /** return the size of the basis set - */ - int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); } - - bool isIdentity() const { return Identity; }; - - /** check consistency between Identity and C - * - */ - void checkObject() const final; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const final; - - void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final; - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) final; - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final; - - void evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) final; - - void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) final; - - //NOTE: The data types get complicated here, so here's an overview of the - // data types associated with ionic derivatives, and how to get their data. - // - //NOTE: These data structures hold the data for one particular ion, and so the ID is implicit. - // It's up to the user to keep track of which ion these derivatives refer to. - // - // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for each electron. - // Example: grad_phi[iel][iorb][idim]. iel -- electron index. - // iorb -- orbital index. - // idim -- cartesian index of ionic derivative. - // X=0, Y=1, Z=2. - // - // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the electron gradient components - // for each SPO and each electron. - // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- electron index. - // iorb -- orbital index. - // idim -- ionic derivative's cartesian index. - // X=0, Y=1, Z=2 - // edim -- electron derivative's cartesian index. - // x=0, y=1, z=2. - // - // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the electron laplacian for each SPO and each electron. - // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron index. - // iorb -- orbital index. - // idim -- cartesian index of ionic derivative. - // X=0, Y=1, Z=2. - - /** - * \brief Calculate ion derivatives of SPO's. - * - * @param P Electron particle set. - * @param first index of first electron - * @@param last index of last electron - * @param source Ion particle set. - * @param iat_src Index of ion. - * @param gradphi Container storing ion gradients for all particles and all orbitals. - */ - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi) final; - - /** - * \brief Calculate ion derivatives of SPO's, their gradients, and their laplacians. - * - * @param P Electron particle set. - * @param first index of first electron. - * @@param last index of last electron - * @param source Ion particle set. - * @param iat_src Index of ion. - * @param grad_phi Container storing ion gradients for all particles and all orbitals. - * @param grad_grad_phi Container storing ion gradients of electron gradients for all particles and all orbitals. - * @param grad_lapl_phi Container storing ion gradients of SPO laplacians for all particles and all orbitals. - */ - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) final; - - void evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& grad_phi) final; - - void createResource(ResourceCollection& collection) const final; - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const final; - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const final; - -protected: - ///number of Single-particle orbitals - const IndexType BasisSetSize; - /// a copy of the original C before orbital rotation is applied; - std::shared_ptr C_copy; - - ///true if C is an identity matrix - bool Identity; - ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L - vgl_type Temp; - ///Tempv(OrbitalSetSize) Tempv=C*Temp - vgl_type Tempv; - - ///These are temporary VectorSoAContainers to hold value, gradient, and hessian for - ///all basis or SPO functions evaluated at a given point. - ///Nbasis x [1(value)+3(gradient)+6(hessian)] - vgh_type Temph; - ///Norbitals x [1(value)+3(gradient)+6(hessian)] - vgh_type Temphv; - - ///These are temporary VectorSoAContainers to hold value, gradient, hessian, and - /// gradient hessian for all basis or SPO functions evaluated at a given point. - ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempgh; - ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempghv; - -private: - ///helper functions to handle Identity - void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const; - - void evaluate_vgl_impl(const vgl_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) const; - ///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures. - - - ///This unpacks temp into vectors psi, dpsi, and d2psi. - void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const; - - ///Unpacks temp into the ith row (or electron index) of logdet, dlogdet, dhlogdet. - void evaluate_vgh_impl(const vgh_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& dhlogdet) const; - ///Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value, gradient, hessian - ///and gradient hessian. - void evaluate_vghgh_impl(const vghgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi, - GGGVector& dghpsi) const; - - void evaluate_vghgh_impl(const vghgh_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& dhlogdet, - GGGMatrix& dghlogdet) const; - - - ///Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet. - void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const; - - ///Unpacks data in vgl object and calculates/places ionic gradient of value, - /// electron gradient, and electron laplacian result into dlogdet, dglogdet, and dllogdet respectively. - void evaluate_ionderiv_vgl_impl(const vghgh_type& temp, - int i, - GradMatrix& dlogdet, - HessMatrix& dglogdet, - GradMatrix& dllogdet) const; - - ///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet. - void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const; - - void mw_evaluateVGLImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& phi_vgl_v) const; - - /// packed walker GEMM implementation - void mw_evaluateValueImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& phi_v) const; - /// packed walker GEMM implementation with multi virtual particle sets - void mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - OffloadMWVArray& phi_v) const; +using LCAOrbitalSet = LCAOrbitalSetT; - /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet - RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader& spo_list) const; - struct LCAOMultiWalkerMem; - ResourceHandle mw_mem_handle_; - /// timer for basis set - NewTimer& basis_timer_; - /// timer for MO - NewTimer& mo_timer_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp index 6abd2d8b22..81f6b64da4 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -129,6 +129,7 @@ template void LCAOrbitalSetT::createResource(ResourceCollection& collection) const { + myBasisSet->createResource(collection); auto resource_index = collection.addResource(std::make_unique()); } @@ -140,6 +141,7 @@ LCAOrbitalSetT::acquireResource(ResourceCollection& collection, { assert(this == &spo_list.getLeader()); auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); spo_leader.mw_mem_handle_ = collection.lendResource(); } @@ -150,9 +152,21 @@ LCAOrbitalSetT::releaseResource(ResourceCollection& collection, { assert(this == &spo_list.getLeader()); auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); collection.takebackResource(spo_leader.mw_mem_handle_); } +template +RefVectorWithLeader::basis_type> LCAOrbitalSetT::extractBasisRefList( + const RefVectorWithLeader>& spo_list) const +{ + RefVectorWithLeader basis_list(*spo_list.template getCastedLeader>().myBasisSet); + basis_list.reserve(spo_list.size()); + for (size_t iw = 0; iw < spo_list.size(); iw++) + basis_list.push_back(*spo_list.template getCastedElement>(iw).myBasisSet); + return basis_list; +} + template std::unique_ptr> LCAOrbitalSetT::makeClone() const @@ -954,9 +968,12 @@ LCAOrbitalSetT::applyRotation( } // Class concrete types from ValueType +#ifndef QMC_COMPLEX template class LCAOrbitalSetT; template class LCAOrbitalSetT; +#else template class LCAOrbitalSetT>; template class LCAOrbitalSetT>; +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h index f8bf40d017..29f8c897d2 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -371,6 +371,9 @@ class LCAOrbitalSetT : public SPOSetT const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& phi_v) const; + /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet + RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader>& spo_list) const; + struct LCAOMultiWalkerMem; ResourceHandle mw_mem_handle_; /// timer for basis set diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp deleted file mode 100644 index 674a9a6c1f..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp +++ /dev/null @@ -1,65 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "LCAOrbitalSetWithCorrection.h" - -namespace qmcplusplus -{ -LCAOrbitalSetWithCorrection::LCAOrbitalSetWithCorrection(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs) - : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) -{} - -void LCAOrbitalSetWithCorrection::setOrbitalSetSize(int norbs) -{ - assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); - OrbitalSetSize = norbs; - cusp.setOrbitalSetSize(norbs); -} - - -std::unique_ptr LCAOrbitalSetWithCorrection::makeClone() const -{ - return std::make_unique(*this); -} - -void LCAOrbitalSetWithCorrection::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - lcao.evaluateValue(P, iat, psi); - cusp.addV(P, iat, psi); -} - -void LCAOrbitalSetWithCorrection::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); - cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); -} - -void LCAOrbitalSetWithCorrection::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h index 65185973d2..6b25e719e2 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h @@ -13,57 +13,12 @@ #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" -#include "LCAOrbitalSet.h" -#include "SoaCuspCorrection.h" - +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h" namespace qmcplusplus { -/** class to add cusp correction to LCAOrbitalSet. - * - */ -class LCAOrbitalSetWithCorrection : public SPOSet -{ -public: - using basis_type = LCAOrbitalSet::basis_type; - /** constructor - * @param ions - * @param els - * @param bs pointer to the BasisSet - * @param rl report level - */ - LCAOrbitalSetWithCorrection(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs); - - LCAOrbitalSetWithCorrection(const LCAOrbitalSetWithCorrection& in) = default; - - std::string getClassName() const final { return "LCAOrbitalSetWithCorrection"; } - - std::unique_ptr makeClone() const final; - - void setOrbitalSetSize(int norbs) final; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; - - friend class LCAOrbitalBuilder; - -private: - LCAOrbitalSet lcao; +using LCAOrbitalSetWithCorrection = LCAOrbitalSetWithCorrectionT; - SoaCuspCorrection cusp; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h index 1f1bc53d5e..33aa707096 100644 --- a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h +++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h @@ -63,7 +63,15 @@ struct SoaAtomicBasisSetT using RealType = typename ROT::RealType; using GridType = typename ROT::GridType; using ValueType = ORBT; - + using OffloadArray4D = Array>; + using OffloadArray3D = Array>; + using OffloadMatrix = Matrix>; + using OffloadVector = Vector>; + + /// multi walker shared memory buffer + struct SoaAtomicBSetMultiWalkerMem; + /// multi walker resource handle + ResourceHandle mw_mem_handle_; /// size of the basis set int BasisSetSize; /// Number of Cell images for the evaluation of the orbital with PBC. If No @@ -769,6 +777,47 @@ struct SoaAtomicBasisSetT } } } + + void createResource(ResourceCollection& collection) const + { + collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + atom_basis_list.template getCastedLeader().mw_mem_handle_ = + collection.lendResource(); + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + collection.takebackResource(atom_basis_list.template getCastedLeader().mw_mem_handle_); + } + + struct SoaAtomicBSetMultiWalkerMem : public Resource + { + SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {} + + SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {} + + std::unique_ptr makeClone() const override + { + return std::make_unique(*this); + } + + OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm] + OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl] + OffloadArray3D ylm_v; // [Nelec][PBC][NYlm] + OffloadArray3D rnl_v; // [Nelec][PBC][NRnl] + OffloadMatrix dr_pbc; // [PBC][xyz] translation vector for each image + OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image + OffloadMatrix r; // [Nelec][PBC] ion->elec distance for each image + OffloadVector correctphase; // [Nelec] overall phase + }; }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp deleted file mode 100644 index 0b41857fb9..0000000000 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp +++ /dev/null @@ -1,160 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2021 QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -/** @file SoaCuspCorrection.cpp - */ -#include "SoaCuspCorrection.h" -#include "SoaCuspCorrectionBasisSet.h" - -namespace qmcplusplus -{ -SoaCuspCorrection::SoaCuspCorrection(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions)) -{ - NumCenters = ions.getTotalNum(); - NumTargets = els.getTotalNum(); - LOBasisSet.resize(NumCenters); -} - -SoaCuspCorrection::SoaCuspCorrection(const SoaCuspCorrection& a) = default; - -void SoaCuspCorrection::setOrbitalSetSize(int norbs) -{ - MaxOrbSize = norbs; - myVGL.resize(5, MaxOrbSize); -} - -inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl) -{ - assert(MaxOrbSize >= vgl.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - { - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - auto v_out = vgl.data(0); - auto gx_out = vgl.data(1); - auto gy_out = vgl.data(2); - auto gz_out = vgl.data(3); - auto l_out = vgl.data(4); - for (size_t i = 0; i < vgl.size(); ++i) - { - v_out[i] += v_in[i]; - gx_out[i] += gx_in[i]; - gy_out[i] += gy_in[i]; - gz_out[i] += gz_in[i]; - l_out[i] += l_in[i]; - } - } -} - -void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - assert(MaxOrbSize >= psi.size()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.size(); ++i) - { - psi[i] += v_in[i]; - dpsi[i][0] += gx_in[i]; - dpsi[i][1] += gy_in[i]; - dpsi[i][2] += gz_in[i]; - d2psi[i] += l_in[i]; - } -} - -void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, - int iat, - int idx, - ValueMatrix& psi, - GradMatrix& dpsi, - ValueMatrix& d2psi) -{ - assert(MaxOrbSize >= psi.cols()); - myVGL = 0.0; - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]); - - const auto v_in = myVGL[0]; - const auto gx_in = myVGL[1]; - const auto gy_in = myVGL[2]; - const auto gz_in = myVGL[3]; - const auto l_in = myVGL[4]; - for (size_t i = 0; i < psi.cols(); ++i) - { - psi[idx][i] += v_in[i]; - dpsi[idx][i][0] += gx_in[i]; - dpsi[idx][i][1] += gy_in[i]; - dpsi[idx][i][2] += gz_in[i]; - d2psi[idx][i] += l_in[i]; - } -} - -void SoaCuspCorrection::evaluateV(const ParticleSet& P, int iat, ValueVector& psi) -{ - assert(MaxOrbSize >= psi.size()); - ValueType* tmp_vals = myVGL[0]; - - std::fill_n(tmp_vals, myVGL.size(), 0.0); - - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - - //THIS IS SERIAL, only way to avoid this is to use myVGL - for (int c = 0; c < NumCenters; c++) - if (LOBasisSet[c]) - LOBasisSet[c]->evaluate(dist[c], tmp_vals); - - { //collect - const auto v_in = myVGL[0]; - for (size_t i = 0; i < psi.size(); ++i) - psi[i] += v_in[i]; - } -} - -void SoaCuspCorrection::add(int icenter, std::unique_ptr aos) -{ - assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!"); - LOBasisSet[icenter].reset(aos.release()); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp deleted file mode 100644 index cdc5363544..0000000000 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp +++ /dev/null @@ -1,417 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include -#include "SoaLocalizedBasisSet.h" -#include "Particle/DistanceTable.h" -#include "SoaAtomicBasisSet.h" -#include "MultiQuinticSpline1D.h" -#include "MultiFunctorAdapter.h" -#include "Numerics/SoaCartesianTensor.h" -#include "Numerics/SoaSphericalTensor.h" - -namespace qmcplusplus -{ - -template -void SoaLocalizedBasisSet::createResource(ResourceCollection& collection) const -{ - for (int i = 0; i < LOBasisSet.size(); i++) - LOBasisSet[i]->createResource(collection); -} -template -void SoaLocalizedBasisSet::acquireResource( - ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const -{ - // need to cast to SoaLocalizedBasisSet to access LOBasisSet (atomic basis) - auto& loc_basis_leader = basisset_list.template getCastedLeader>(); - auto& basisset_leader = loc_basis_leader.LOBasisSet; - for (int i = 0; i < basisset_leader.size(); i++) - { - const auto one_species_basis_list(extractOneSpeciesBasisRefList(basisset_list, i)); - basisset_leader[i]->acquireResource(collection, one_species_basis_list); - } -} -template -void SoaLocalizedBasisSet::releaseResource( - ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const -{ - // need to cast to SoaLocalizedBasisSet to access LOBasisSet (atomic basis) - auto& loc_basis_leader = basisset_list.template getCastedLeader>(); - auto& basisset_leader = loc_basis_leader.LOBasisSet; - for (int i = 0; i < basisset_leader.size(); i++) - { - const auto one_species_basis_list(extractOneSpeciesBasisRefList(basisset_list, i)); - basisset_leader[i]->releaseResource(collection, one_species_basis_list); - } -} -template -RefVectorWithLeader SoaLocalizedBasisSet::extractOneSpeciesBasisRefList( - const RefVectorWithLeader>& basisset_list, - int id) -{ - auto& loc_basis_leader = basisset_list.template getCastedLeader>(); - RefVectorWithLeader one_species_basis_list(*loc_basis_leader.LOBasisSet[id]); - one_species_basis_list.reserve(basisset_list.size()); - for (size_t iw = 0; iw < basisset_list.size(); iw++) - one_species_basis_list.push_back( - *basisset_list.template getCastedElement>(iw).LOBasisSet[id]); - return one_species_basis_list; -} - - -template -SoaLocalizedBasisSet::SoaLocalizedBasisSet(ParticleSet& ions, ParticleSet& els) - : ions_(ions), - myTableIndex(els.addTable(ions, DTModes::NEED_FULL_TABLE_ANYTIME | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), - SuperTwist(0.0) -{ - NumCenters = ions.getTotalNum(); - NumTargets = els.getTotalNum(); - LOBasisSet.resize(ions.getSpeciesSet().getTotalNum()); - BasisOffset.resize(NumCenters + 1); - BasisSetSize = 0; -} - -template -SoaLocalizedBasisSet::SoaLocalizedBasisSet(const SoaLocalizedBasisSet& a) - : SoaBasisSetBase(a), - NumCenters(a.NumCenters), - NumTargets(a.NumTargets), - ions_(a.ions_), - myTableIndex(a.myTableIndex), - SuperTwist(a.SuperTwist), - BasisOffset(a.BasisOffset) -{ - LOBasisSet.reserve(a.LOBasisSet.size()); - for (auto& elem : a.LOBasisSet) - LOBasisSet.push_back(std::make_unique(*elem)); -} - -template -void SoaLocalizedBasisSet::setPBCParams(const TinyVector& PBCImages, - const TinyVector Sup_Twist, - const std::vector& phase_factor) -{ - for (int i = 0; i < LOBasisSet.size(); ++i) - LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor); - - SuperTwist = Sup_Twist; -} - -template -void SoaLocalizedBasisSet::setBasisSetSize(int nbs) -{ - const auto& IonID(ions_.GroupID); - if (BasisSetSize > 0 && nbs == BasisSetSize) - return; - - if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) - { - //evaluate the total basis dimension and offset for each center - BasisOffset[0] = 0; - for (int c = 0; c < NumCenters; c++) - BasisOffset[c + 1] = BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize(); - BasisSetSize = BasisOffset[NumCenters]; - } - else - { - // when particles are reordered due to grouping, AOs need to restore the input order to match MOs. - std::vector map_input_to_storage(mapping.size()); - for (int c = 0; c < NumCenters; c++) - map_input_to_storage[mapping[c]] = c; - - std::vector basis_offset_input_order(BasisOffset.size(), 0); - for (int c = 0; c < NumCenters; c++) - basis_offset_input_order[c + 1] = - basis_offset_input_order[c] + LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize(); - - for (int c = 0; c < NumCenters; c++) - BasisOffset[c] = basis_offset_input_order[mapping[c]]; - - BasisSetSize = basis_offset_input_order[NumCenters]; - } -} - -template -void SoaLocalizedBasisSet::queryOrbitalsForSType(const std::vector& corrCenter, - std::vector& is_s_orbital) const -{ - const auto& IonID(ions_.GroupID); - for (int c = 0; c < NumCenters; c++) - { - int idx = BasisOffset[c]; - int bss = LOBasisSet[IonID[c]]->BasisSetSize; - std::vector local_is_s_orbital(bss); - LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital); - for (int k = 0; k < bss; k++) - { - if (corrCenter[c]) - { - is_s_orbital[idx++] = local_is_s_orbital[k]; - } - else - { - is_s_orbital[idx++] = false; - } - } - } -} - -template -void SoaLocalizedBasisSet::evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) -{ - const auto& IonID(ions_.GroupID); - const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - - PosType Tv; - for (int c = 0; c < NumCenters; c++) - { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateVGL(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv); - } -} - -template -void SoaLocalizedBasisSet::mw_evaluateVGL(const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& vgl_v) -{ - for (size_t iw = 0; iw < P_list.size(); iw++) - { - const auto& IonID(ions_.GroupID); - const auto& coordR = P_list[iw].activeR(iat); - const auto& d_table = P_list[iw].getDistTableAB(myTableIndex); - const auto& dist = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - - PosType Tv; - - // number of walkers * BasisSetSize - auto stride = vgl_v.size(1) * BasisSetSize; - assert(BasisSetSize == vgl_v.size(2)); - vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride); - - for (int c = 0; c < NumCenters; c++) - { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], displ[c], BasisOffset[c], vgl_iw, Tv); - } - } -} - - -template -void SoaLocalizedBasisSet::evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) -{ - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - { - LOBasisSet[IonID[c]]->evaluateVGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh); - } -} - -template -void SoaLocalizedBasisSet::evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) -{ - // APP_ABORT("SoaLocalizedBasisSet::evaluateVGH() not implemented\n"); - - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - for (int c = 0; c < NumCenters; c++) - { - LOBasisSet[IonID[c]]->evaluateVGHGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh); - } -} - -template -void SoaLocalizedBasisSet::evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) -{ - const auto& IonID(ions_.GroupID); - const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - - PosType Tv; - for (int c = 0; c < NumCenters; c++) - { - Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0]; - Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1]; - Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2]; - LOBasisSet[IonID[c]]->evaluateV(P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv); - } -} - -template -void SoaLocalizedBasisSet::mw_evaluateValue(const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& v) -{ - for (size_t iw = 0; iw < P_list.size(); iw++) - evaluateV(P_list[iw], iat, v.data_at(iw, 0)); -} - -template -void SoaLocalizedBasisSet::mw_evaluateValueVPs(const RefVectorWithLeader& vp_list, - OffloadMWVArray& v) -{ - assert(BasisSetSize == v.size(1)); - size_t index = 0; - for (size_t iw = 0; iw < vp_list.size(); iw++) - for (int iat = 0; iat < vp_list[iw].getTotalNum(); iat++) - evaluateV(vp_list[iw], iat, v.data_at(index++, 0)); -} - - -template -void SoaLocalizedBasisSet::evaluateGradSourceV(const ParticleSet& P, - int iat, - const ParticleSet& ions, - int jion, - vgl_type& vgl) -{ - //We need to zero out the temporary array vgl. - auto* restrict gx = vgl.data(1); - auto* restrict gy = vgl.data(2); - auto* restrict gz = vgl.data(3); - - for (int ib = 0; ib < BasisSetSize; ib++) - { - gx[ib] = 0; - gy[ib] = 0; - gz[ib] = 0; - } - - const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - - - PosType Tv; - Tv[0] = Tv[1] = Tv[2] = 0; - //Since LCAO's are written only in terms of (r-R), ionic derivatives only exist for the atomic center - //that we wish to take derivatives of. Moreover, we can obtain an ion derivative by multiplying an electron - //derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For now, just note this is the electron VGL function. - LOBasisSet[IonID[jion]]->evaluateVGL(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv); -} - -template -void SoaLocalizedBasisSet::evaluateGradSourceVGL(const ParticleSet& P, - int iat, - const ParticleSet& ions, - int jion, - vghgh_type& vghgh) -{ - //We need to zero out the temporary array vghgh. - auto* restrict gx = vghgh.data(1); - auto* restrict gy = vghgh.data(2); - auto* restrict gz = vghgh.data(3); - - auto* restrict hxx = vghgh.data(4); - auto* restrict hxy = vghgh.data(5); - auto* restrict hxz = vghgh.data(6); - auto* restrict hyy = vghgh.data(7); - auto* restrict hyz = vghgh.data(8); - auto* restrict hzz = vghgh.data(9); - - auto* restrict gxxx = vghgh.data(10); - auto* restrict gxxy = vghgh.data(11); - auto* restrict gxxz = vghgh.data(12); - auto* restrict gxyy = vghgh.data(13); - auto* restrict gxyz = vghgh.data(14); - auto* restrict gxzz = vghgh.data(15); - auto* restrict gyyy = vghgh.data(16); - auto* restrict gyyz = vghgh.data(17); - auto* restrict gyzz = vghgh.data(18); - auto* restrict gzzz = vghgh.data(19); - - - for (int ib = 0; ib < BasisSetSize; ib++) - { - gx[ib] = 0; - gy[ib] = 0; - gz[ib] = 0; - - hxx[ib] = 0; - hxy[ib] = 0; - hxz[ib] = 0; - hyy[ib] = 0; - hyz[ib] = 0; - hzz[ib] = 0; - - gxxx[ib] = 0; - gxxy[ib] = 0; - gxxz[ib] = 0; - gxyy[ib] = 0; - gxyz[ib] = 0; - gxzz[ib] = 0; - gyyy[ib] = 0; - gyyz[ib] = 0; - gyzz[ib] = 0; - gzzz[ib] = 0; - } - - // Since jion is indexed on the source ions not the ions_ the distinction between - // ions_ and ions is extremely important. - const auto& IonID(ions.GroupID); - const auto& d_table = P.getDistTableAB(myTableIndex); - const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); - - //Since LCAO's are written only in terms of (r-R), ionic derivatives only exist for the atomic center - //that we wish to take derivatives of. Moreover, we can obtain an ion derivative by multiplying an electron - //derivative by -1.0. Handling this sign is left to LCAOrbitalSet. For now, just note this is the electron VGL function. - - LOBasisSet[IonID[jion]]->evaluateVGHGH(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh); -} - -template -void SoaLocalizedBasisSet::add(int icenter, std::unique_ptr aos) -{ - LOBasisSet[icenter] = std::move(aos); -} - -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet, SoaCartesianTensor>, - QMCTraits::ValueType>; -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet, SoaSphericalTensor>, - QMCTraits::ValueType>; -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet>, SoaCartesianTensor>, - QMCTraits::ValueType>; -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet>, SoaSphericalTensor>, - QMCTraits::ValueType>; -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet>, SoaCartesianTensor>, - QMCTraits::ValueType>; -template class SoaLocalizedBasisSet< - SoaAtomicBasisSet>, SoaSphericalTensor>, - QMCTraits::ValueType>; -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h index caeaa69071..49358e1e6b 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h @@ -19,182 +19,13 @@ #ifndef QMCPLUSPLUS_SOA_LOCALIZEDBASISSET_H #define QMCPLUSPLUS_SOA_LOCALIZEDBASISSET_H -#include -#include "QMCWaveFunctions/BasisSetBase.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h" namespace qmcplusplus { -/** A localized basis set derived from SoaBasisSetBase - * - * This class performs the evaluation of the basis functions and their - * derivatives for each of the N-particles in a configuration. - * The template parameter COT denotes Centered-Orbital-Type which provides - * a set of localized orbitals associated with a center. - * The template parameter ORBT denotes the orbital value return type - */ template -class SoaLocalizedBasisSet : public SoaBasisSetBase -{ -public: - using RealType = typename COT::RealType; - using BaseType = SoaBasisSetBase; - using ValueType = QMCTraits::ValueType; - - using vgl_type = typename BaseType::vgl_type; - using vgh_type = typename BaseType::vgh_type; - using vghgh_type = typename BaseType::vghgh_type; - using PosType = typename ParticleSet::PosType; - using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray; - using OffloadMWVArray = typename BaseType::OffloadMWVArray; - - using BaseType::BasisSetSize; - - ///number of centers, e.g., ions - size_t NumCenters; - ///number of quantum particles - size_t NumTargets; - ///ion particle set - const ParticleSet& ions_; - ///number of quantum particles - const int myTableIndex; - ///Global Coordinate of Supertwist read from HDF5 - PosType SuperTwist; - - - /** container to store the offsets of the basis functions for each center - * Due to potential reordering of ions, offsets can be in any order. - */ - std::vector BasisOffset; - - /** container of the unique pointers to the Atomic Orbitals - * - * size of LOBasisSet = number of unique centers - */ - std::vector> LOBasisSet; - - /** constructor - * @param ions ionic system - * @param els electronic system - */ - SoaLocalizedBasisSet(ParticleSet& ions, ParticleSet& els); - - /** copy constructor */ - SoaLocalizedBasisSet(const SoaLocalizedBasisSet& a); - - /** makeClone */ - BaseType* makeClone() const override { return new SoaLocalizedBasisSet(*this); } - - /** set Number of periodic Images to evaluate the orbitals. - Set to 0 for non-PBC, and set manually in the input. - Passes the pre-computed phase factor for evaluation of complex wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or -1 if non-Gamma. - */ - void setPBCParams(const TinyVector& PBCImages, - const TinyVector Sup_Twist, - const std::vector& phase_factor); - - /** set BasisSetSize and allocate mVGL container - */ - void setBasisSetSize(int nbs) override; - - /** Determine which orbitals are S-type. Used by cusp correction. - */ - void queryOrbitalsForSType(const std::vector& corrCenter, std::vector& is_s_orbital) const override; - - /** compute VGL - * @param P quantum particleset - * @param iat active particle - * @param vgl Matrix(5,BasisSetSize) - * @param trialMove if true, use getTempDists()/getTempDispls() - */ - void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) override; - - /** compute V using packed array with all walkers - * @param P_list list of quantum particleset (one for each walker) - * @param iat active particle - * @param v Array(n_walkers, BasisSetSize) - */ - void mw_evaluateValue(const RefVectorWithLeader& P_list, int iat, OffloadMWVArray& v) override; - - /** compute V using packed array with all walkers - * @param vp_list list of quantum virtual particleset (one for each walker) - * @param v Array(n_walkers, BasisSetSize) - */ - void mw_evaluateValueVPs(const RefVectorWithLeader& vp_list, OffloadMWVArray& v) override; - - - /** compute VGL using packed array with all walkers - * @param P_list list of quantum particleset (one for each walker) - * @param iat active particle - * @param vgl Array(n_walkers, 5, BasisSetSize) - */ - void mw_evaluateVGL(const RefVectorWithLeader& P_list, int iat, OffloadMWVGLArray& vgl) override; - - /** compute VGH - * @param P quantum particleset - * @param iat active particle - * @param vgl Matrix(10,BasisSetSize) - * @param trialMove if true, use getTempDists()/getTempDispls() - */ - void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) override; - - /** compute VGHGH - * @param P quantum particleset - * @param iat active particle - * @param vghgh Matrix(20,BasisSetSize) - * @param trialMove if true, use getTempDists()/getTempDispls() - */ - void evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) override; - - /** compute values for the iat-paricle move - * - * Always uses getTempDists() and getTempDispls() - * Tv is a translation vector; In PBC, in order to reduce the number - * of images that need to be summed over when generating the AO the - * nearest image displacement, dr, is used. Tv corresponds to the - * translation that takes the 'general displacement' (displacement - * between ion position and electron position) to the nearest image - * displacement. We need to keep track of Tv because it must be add - * as a phase factor, i.e., exp(i*k*Tv). - */ - void evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) override; - - void evaluateGradSourceV(const ParticleSet& P, int iat, const ParticleSet& ions, int jion, vgl_type& vgl) override; - - void evaluateGradSourceVGL(const ParticleSet& P, - int iat, - const ParticleSet& ions, - int jion, - vghgh_type& vghgh) override; - - /** add a new set of Centered Atomic Orbitals - * @param icenter the index of the center - * @param aos a set of Centered Atomic Orbitals - */ - void add(int icenter, std::unique_ptr aos); - - - /** initialize a shared resource and hand it to collection - */ - void createResource(ResourceCollection& collection) const override; - - /** acquire a shared resource from collection - */ - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const override; - - /** return a shared resource to collection - */ - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader>& basisset_list) const override; - +using SoaLocalizedBasisSet = SoaLocalizedBasisSetT; - /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`) - * from a list of basis sets - */ - static RefVectorWithLeader extractOneSpeciesBasisRefList( - const RefVectorWithLeader>& basisset_list, - int id); -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp index 7b62735768..8b8ab7c66c 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp @@ -22,6 +22,58 @@ namespace qmcplusplus { + +template +void SoaLocalizedBasisSetT::createResource(ResourceCollection& collection) const +{ + for (int i = 0; i < LOBasisSet.size(); i++) + LOBasisSet[i]->createResource(collection); +} + +template +void SoaLocalizedBasisSetT::acquireResource( + ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const +{ + // need to cast to SoaLocalizedBasisSet to access LOBasisSet (atomic basis) + auto& loc_basis_leader = basisset_list.template getCastedLeader>(); + auto& basisset_leader = loc_basis_leader.LOBasisSet; + for (int i = 0; i < basisset_leader.size(); i++) + { + const RefVectorWithLeader one_species_basis_list(extractOneSpeciesBasisRefList(basisset_list, i)); + basisset_leader[i]->acquireResource(collection, one_species_basis_list); + } +} + +template +void SoaLocalizedBasisSetT::releaseResource( + ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const +{ + // need to cast to SoaLocalizedBasisSet to access LOBasisSet (atomic basis) + auto& loc_basis_leader = basisset_list.template getCastedLeader>(); + auto& basisset_leader = loc_basis_leader.LOBasisSet; + for (int i = 0; i < basisset_leader.size(); i++) + { + const RefVectorWithLeader one_species_basis_list(extractOneSpeciesBasisRefList(basisset_list, i)); + basisset_leader[i]->releaseResource(collection, one_species_basis_list); + } +} + +template +RefVectorWithLeader SoaLocalizedBasisSetT::extractOneSpeciesBasisRefList( + const RefVectorWithLeader>& basisset_list, + int id) +{ + auto& loc_basis_leader = basisset_list.template getCastedLeader>(); + RefVectorWithLeader one_species_basis_list(*loc_basis_leader.LOBasisSet[id]); + one_species_basis_list.reserve(basisset_list.size()); + for (size_t iw = 0; iw < basisset_list.size(); iw++) + one_species_basis_list.push_back( + *basisset_list.template getCastedElement>(iw).LOBasisSet[id]); + return one_species_basis_list; +} + template SoaLocalizedBasisSetT::SoaLocalizedBasisSetT( ParticleSetT& ions, ParticleSetT& els) : @@ -365,105 +417,124 @@ SoaLocalizedBasisSetT::add(int icenter, std::unique_ptr aos) // TODO: this should be redone with template template parameters +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaCartesianTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaCartesianTensor, - std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaCartesianTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaCartesianTensor, + std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>, std::complex>; +#endif +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaSphericalTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT, SoaSphericalTensor, - std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaSphericalTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT, SoaSphericalTensor, + std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>, std::complex>; +#endif +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; +#endif +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; +#endif +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaCartesianTensor, std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaCartesianTensor, std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>, std::complex>; +#endif +#ifndef QMC_COMPLEX template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, double>, double>; -template class SoaLocalizedBasisSetT< - SoaAtomicBasisSetT>, - SoaSphericalTensor, std::complex>, - std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, float>, float>; +#else +template class SoaLocalizedBasisSetT< + SoaAtomicBasisSetT>, + SoaSphericalTensor, std::complex>, + std::complex>; template class SoaLocalizedBasisSetT< SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>, std::complex>; +#endif + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h index 6f2e412413..6839fef181 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h @@ -185,6 +185,28 @@ class SoaLocalizedBasisSetT : public SoaBasisSetBaseT */ void add(int icenter, std::unique_ptr aos); + + /** initialize a shared resource and hand it to collection + */ + void createResource(ResourceCollection& collection) const override; + + /** acquire a shared resource from collection + */ + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const override; + + /** return a shared resource to collection + */ + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& basisset_list) const override; + + + /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`) + * from a list of basis sets + */ + static RefVectorWithLeader extractOneSpeciesBasisRefList( + const RefVectorWithLeader>& basisset_list, + int id); }; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/OptimizableFunctorBase.h b/src/QMCWaveFunctions/OptimizableFunctorBase.h index 33048b3049..01c470e9a2 100644 --- a/src/QMCWaveFunctions/OptimizableFunctorBase.h +++ b/src/QMCWaveFunctions/OptimizableFunctorBase.h @@ -22,7 +22,7 @@ #include "OptimizableObject.h" #include "OhmmsData/OhmmsElementBase.h" #include "OhmmsPETE/TinyVector.h" -//#include +#include "QMCWaveFunctions/VariableSet.h" #include namespace qmcplusplus @@ -48,8 +48,6 @@ struct OptimizableFunctorBase : public OptimizableObject { ///typedef for real values using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; ///maximum cutoff real_type cutoff_radius = 0.0; ///set of variables to be optimized diff --git a/src/QMCWaveFunctions/OptimizableObject.h b/src/QMCWaveFunctions/OptimizableObject.h index 17cf4af88e..2033e89fc9 100644 --- a/src/QMCWaveFunctions/OptimizableObject.h +++ b/src/QMCWaveFunctions/OptimizableObject.h @@ -13,103 +13,21 @@ #ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECT_H #define QMCPLUSPLUS_OPTIMIZABLEOBJECT_H -#include "VariableSet.h" -#include "type_traits/template_types.hpp" +#include "Configuration.h" +#include "OptimizableObjectT.h" /**@file OptimizableObject.h *@brief Declaration of OptimizableObject */ namespace qmcplusplus { -using opt_variables_type = optimize::VariableSet; +using opt_variables_type = OptVariablesTypeT; -class OptimizableObject -{ -public: - OptimizableObject(const std::string& name) : name_(name) {} - - const std::string& getName() const { return name_; } - bool isOptimized() const { return is_optimized_; } - -private: - /** Name of the optimizable object - */ - const std::string name_; - /** If true, this object is actively modified during WFOpt - */ - bool is_optimized_ = false; - -public: - /** check in variational parameters to the global list of parameters used by the optimizer. - * @param active a super set of optimizable variables - * - * The existing checkInVariables implementation in WFC/SPO/.. are inclusive and it calls checkInVariables of its members - * class A: public SPOSet {} - * class B: public WFC - * { - * A objA; - * checkInVariables() { objA.checkInVariables(); } - * }; - * - * With OptimizableObject, - * class A: public OptimizableObject {} - * class B: public OptimizableObject - * { - * A objA; - * checkInVariablesExclusive() { // should not call objA.checkInVariablesExclusive() if objA has been extracted; } - * }; - * A vector of OptimizableObject, will be created by calling extractOptimizableObjects(). - * All the checkInVariablesExclusive() will be called through this vector and thus - * checkInVariablesExclusive implementation should only handle non-OptimizableObject members. - */ - virtual void checkInVariablesExclusive(opt_variables_type& active) = 0; - - /** reset the parameters during optimizations. Exclusive, see checkInVariablesExclusive - */ - virtual void resetParametersExclusive(const opt_variables_type& active) = 0; +using OptVariablesType = OptVariablesTypeT; - /** print the state, e.g., optimizables */ - virtual void reportStatus(std::ostream& os) {} - - void setOptimization(bool state) { is_optimized_ = state; } - - /** Write the variational parameters for this object to the VP HDF file - * - * The hout parameter should come from VariableSet::writeToHDF - * - * Objects can use this function to store additional information to the file. - * - * By default the parameters are saved in VariableSet::writeToHDF, and objects - * do not need to implement this function (yet). - * - */ - virtual void writeVariationalParameters(hdf_archive& hout){}; - - /** Read the variational parameters for this object from the VP HDF file - * - * The hin parameter should come from VariableSet::readFromHDF - * - * By default the parameters are read in VariableSet::readFromHDF, and objects - * do not need to implement this function (yet). - */ - virtual void readVariationalParameters(hdf_archive& hin){}; -}; - -class UniqueOptObjRefs : public RefVector -{ -public: - OptimizableObject& operator[](size_t i) const { return RefVector::operator[](i); } +using OptimizableObject = OptimizableObjectT; - void push_back(OptimizableObject& obj) - { - if (obj.getName().empty()) - throw std::logic_error("BUG!! Only named OptimizableObject object can be added to UniqueOptObjRefs!"); - auto result = - std::find_if(begin(), end(), [&](OptimizableObject& element) { return element.getName() == obj.getName(); }); - if (result == end()) - RefVector::push_back(obj); - } -}; +using UniqueOptObjRefs = UniqueOptObjRefsT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h index 111d812ae4..1ab14979b7 100644 --- a/src/QMCWaveFunctions/OptimizableObjectT.h +++ b/src/QMCWaveFunctions/OptimizableObjectT.h @@ -20,8 +20,8 @@ */ namespace qmcplusplus { -template -using OptVariablesType = optimize::VariableSetT; +template +using OptVariablesTypeT = optimize::VariableSetT; template class OptimizableObjectT @@ -76,28 +76,19 @@ class OptimizableObjectT * called through this vector and thus checkInVariablesExclusive * implementation should only handle non-OptimizableObject members. */ - virtual void - checkInVariablesExclusive(OptVariablesType& active) = 0; + virtual void checkInVariablesExclusive(OptVariablesTypeT& active) = 0; - /** reset the parameters during optimizations. Exclusive, see + /** reset the parameters during optimizations. Exclusive, see * checkInVariablesExclusive */ - virtual void - resetParametersExclusive(const OptVariablesType& active) = 0; + virtual void resetParametersExclusive(const OptVariablesTypeT& active) = 0; - /** print the state, e.g., optimizables */ - virtual void - reportStatus(std::ostream& os) - { - } + /** print the state, e.g., optimizables */ + virtual void reportStatus(std::ostream& os) {} - void - setOptimization(bool state) - { - is_optimized_ = state; - } + void setOptimization(bool state) { is_optimized_ = state; } - /** Write the variational parameters for this object to the VP HDF file + /** Write the variational parameters for this object to the VP HDF file * * The hout parameter should come from VariableSet::writeToHDF * @@ -108,18 +99,16 @@ class OptimizableObjectT * objects do not need to implement this function (yet). * */ - virtual void - writeVariationalParameters(hdf_archive& hout){}; + virtual void writeVariationalParameters(hdf_archive& hout){}; - /** Read the variational parameters for this object from the VP HDF file + /** Read the variational parameters for this object from the VP HDF file * * The hin parameter should come from VariableSet::readFromHDF * * By default the parameters are read in VariableSet::readFromHDF, and * objects do not need to implement this function (yet). */ - virtual void - readVariationalParameters(hdf_archive& hin){}; + virtual void readVariationalParameters(hdf_archive& hin){}; }; template diff --git a/src/QMCWaveFunctions/OrbitalSetTraits.h b/src/QMCWaveFunctions/OrbitalSetTraits.h index 881532fcef..39e56bdf62 100644 --- a/src/QMCWaveFunctions/OrbitalSetTraits.h +++ b/src/QMCWaveFunctions/OrbitalSetTraits.h @@ -54,6 +54,7 @@ struct OrbitalSetTraits //: public OrbitalTraits DIM = OHMMS_DIM }; using RealType = RealAlias; + using FullValueType = FullPrec; using ComplexType = std::complex; using ValueType = T; using IndexType = int; diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h index 4a092961d9..e02706f7bf 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h @@ -23,7 +23,7 @@ #include "CPU/e2iphi.h" #include "Configuration.h" #include "Message/Communicate.h" -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" #include "hdf/hdf_archive.h" #include "type_traits/complex_help.hpp" diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp deleted file mode 100644 index 0815484c5e..0000000000 --- a/src/QMCWaveFunctions/RotatedSPOs.cpp +++ /dev/null @@ -1,1727 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -//// This file is distributed under the University of Illinois/NCSA Open Source License. -//// See LICENSE file in top directory for details. -//// -//// Copyright (c) QMCPACK developers. -//// -//// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//// Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley -//// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -//// -//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//////////////////////////////////////////////////////////////////////////////////////// -#include "RotatedSPOs.h" -#include "Numerics/MatrixOperators.h" -#include "Numerics/DeterminantOperators.h" -#include "CPU/BLAS.hpp" -#include "io/hdf/hdf_archive.h" - - -namespace qmcplusplus -{ -RotatedSPOs::RotatedSPOs(const std::string& my_name, std::unique_ptr&& spos) - : SPOSet(my_name), - OptimizableObject(my_name), - Phi(std::move(spos)), - nel_major_(0), - params_supplied(false), - apply_rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine)) -{ - OrbitalSetSize = Phi->getOrbitalSetSize(); -} - -RotatedSPOs::~RotatedSPOs() {} - - -void RotatedSPOs::setRotationParameters(const std::vector& param_list) -{ - params = param_list; - params_supplied = true; -} - -void RotatedSPOs::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) -{ - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); -} - -void RotatedSPOs::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) -{ - rot_indices.reserve(nmo * (nmo - 1) / 2); - - // start with core-active rotations - put them at the beginning of the list - // so it matches the other list of rotation indices - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); - - // Add core-core rotations - put them at the end of the list - for (int i = 0; i < nel; i++) - for (int j = i + 1; j < nel; j++) - rot_indices.emplace_back(i, j); - - // Add active-active rotations - put them at the end of the list - for (int i = nel; i < nmo; i++) - for (int j = i + 1; j < nmo; j++) - rot_indices.emplace_back(i, j); -} - -void RotatedSPOs::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat) -{ - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - rot_mat = 0.0; - - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - const RealType x = param[i]; - - rot_mat[q][p] = x; - rot_mat[p][q] = -x; - } -} - -void RotatedSPOs::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param) -{ - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - param[i] = rot_mat[q][p]; - } -} - -void RotatedSPOs::resetParametersExclusive(const opt_variables_type& active) -{ - std::vector delta_param(m_act_rot_inds.size()); - - size_t psize = m_act_rot_inds.size(); - - if (use_global_rot_) - { - psize = m_full_rot_inds.size(); - assert(psize >= m_act_rot_inds.size()); - } - - std::vector old_param(psize); - std::vector new_param(psize); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int loc = myVars.where(i); - delta_param[i] = active[loc] - myVars[i]; - myVars[i] = active[loc]; - } - - if (use_global_rot_) - { - for (int i = 0; i < m_full_rot_inds.size(); i++) - old_param[i] = myVarsFull[i]; - - applyDeltaRotation(delta_param, old_param, new_param); - - // Save the the params - for (int i = 0; i < m_full_rot_inds.size(); i++) - myVarsFull[i] = new_param[i]; - } - else - { - apply_rotation(delta_param, false); - - // Save the parameters in the history list - history_params_.push_back(delta_param); - } -} - -void RotatedSPOs::writeVariationalParameters(hdf_archive& hout) -{ - hout.push("RotatedSPOs"); - if (use_global_rot_) - { - hout.push("rotation_global"); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); - - int nparam_full = myVarsFull.size(); - std::vector full_params(nparam_full); - for (int i = 0; i < nparam_full; i++) - full_params[i] = myVarsFull[i]; - - hout.write(full_params, rot_global_name); - hout.pop(); - } - else - { - hout.push("rotation_history"); - size_t rows = history_params_.size(); - size_t cols = 0; - if (rows > 0) - cols = history_params_[0].size(); - - Matrix tmp(rows, cols); - for (size_t i = 0; i < rows; i++) - for (size_t j = 0; j < cols; j++) - tmp(i, j) = history_params_[i][j]; - - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); - hout.write(tmp, rot_hist_name); - hout.pop(); - } - - // Save myVars in order to restore object state exactly - // The values aren't meaningful, but they need to match those saved in VariableSet - hout.push("rotation_params"); - std::string rot_params_name = std::string("rotation_params_") + SPOSet::getName(); - - int nparam = myVars.size(); - std::vector params(nparam); - for (int i = 0; i < nparam; i++) - params[i] = myVars[i]; - - hout.write(params, rot_params_name); - hout.pop(); - - hout.pop(); -} - -void RotatedSPOs::readVariationalParameters(hdf_archive& hin) -{ - hin.push("RotatedSPOs", false); - - bool grp_hist_exists = hin.is_group("rotation_history"); - bool grp_global_exists = hin.is_group("rotation_global"); - if (!grp_hist_exists && !grp_global_exists) - app_warning() << "Rotation parameters not found in VP file"; - - - if (grp_global_exists) - { - hin.push("rotation_global", false); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); - - std::vector sizes(1); - if (!hin.getShape(rot_global_name, sizes)) - throw std::runtime_error("Failed to read rotation_global in VP file"); - - int nparam_full_actual = sizes[0]; - int nparam_full = myVarsFull.size(); - - if (nparam_full != nparam_full_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file (" - << nparam_full_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } - std::vector full_params(nparam_full); - hin.read(full_params, rot_global_name); - for (int i = 0; i < nparam_full; i++) - myVarsFull[i] = full_params[i]; - - hin.pop(); - - applyFullRotation(full_params, true); - } - else if (grp_hist_exists) - { - hin.push("rotation_history", false); - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); - std::vector sizes(2); - if (!hin.getShape(rot_hist_name, sizes)) - throw std::runtime_error("Failed to read rotation history in VP file"); - - int rows = sizes[0]; - int cols = sizes[1]; - history_params_.resize(rows); - Matrix tmp(rows, cols); - hin.read(tmp, rot_hist_name); - for (size_t i = 0; i < rows; i++) - { - history_params_[i].resize(cols); - for (size_t j = 0; j < cols; j++) - history_params_[i][j] = tmp(i, j); - } - - hin.pop(); - - applyRotationHistory(); - } - - hin.push("rotation_params", false); - std::string rot_param_name = std::string("rotation_params_") + SPOSet::getName(); - - std::vector sizes(1); - if (!hin.getShape(rot_param_name, sizes)) - throw std::runtime_error("Failed to read rotation_params in VP file"); - - int nparam_actual = sizes[0]; - int nparam = myVars.size(); - if (nparam != nparam_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file (" - << nparam_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } - - std::vector params(nparam); - hin.read(params, rot_param_name); - for (int i = 0; i < nparam; i++) - myVars[i] = params[i]; - - hin.pop(); - - hin.pop(); -} - -void RotatedSPOs::buildOptVariables(const size_t nel) -{ -#if !defined(QMC_COMPLEX) - /* Only rebuild optimized variables if more after-rotation orbitals are needed - * Consider ROHF, there is only one set of SPO for both spin up and down Nup > Ndown. - * nel_major_ will be set Nup. - * - * Use the size of myVars as a flag to avoid building the rotation parameters again - * when a clone is made (the DiracDeterminant constructor calls buildOptVariables) - */ - if (nel > nel_major_ && myVars.size() == 0) - { - nel_major_ = nel; - - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotation parameter indices - RotationIndices created_m_act_rot_inds; - - RotationIndices created_full_rot_inds; - if (use_global_rot_) - createRotationIndicesFull(nel, nmo, created_full_rot_inds); - - createRotationIndices(nel, nmo, created_m_act_rot_inds); - - buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); - } -#endif -} - -void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) -{ -#if !defined(QMC_COMPLEX) - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotations - m_act_rot_inds = rotations; - - if (use_global_rot_) - m_full_rot_inds = full_rotations; - - if (use_global_rot_) - app_log() << "Orbital rotation using global rotation" << std::endl; - else - app_log() << "Orbital rotation using history" << std::endl; - - // This will add the orbital rotation parameters to myVars - // and will also read in initial parameter values supplied in input file - int p, q; - int nparams_active = m_act_rot_inds.size(); - - app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl; - if (params_supplied) - if (nparams_active != params.size()) - throw std::runtime_error( - "The number of supplied orbital rotation parameters does not match number prdouced by the slater " - "expansion. \n"); - - myVars.clear(); - for (int i = 0; i < nparams_active; i++) - { - p = m_act_rot_inds[i].first; - q = m_act_rot_inds[i].second; - std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" - << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - - // If the user input parameters, use those. Otherwise, initialize the parameters to zero - if (params_supplied) - { - myVars.insert(sstr.str(), params[i]); - } - else - { - myVars.insert(sstr.str(), 0.0); - } - } - - if (use_global_rot_) - { - myVarsFull.clear(); - for (int i = 0; i < m_full_rot_inds.size(); i++) - { - p = m_full_rot_inds[i].first; - q = m_full_rot_inds[i].second; - std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p - << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - - if (params_supplied && i < m_act_rot_inds.size()) - myVarsFull.insert(sstr.str(), params[i]); - else - myVarsFull.insert(sstr.str(), 0.0); - } - } - - - //Printing the parameters - if (true) - { - app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n"; - myVars.print(app_log()); - } - - if (params_supplied) - { - std::vector param(m_act_rot_inds.size()); - for (int i = 0; i < m_act_rot_inds.size(); i++) - param[i] = myVars[i]; - apply_rotation(param, false); - } -#endif -} - -void RotatedSPOs::apply_rotation(const std::vector& param, bool use_stored_copy) -{ - assert(param.size() == m_act_rot_inds.size()); - - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); - - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(rot_mat, use_stored_copy); - } -} - -void RotatedSPOs::applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param) -{ - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix new_rot_mat(nmo, nmo); - constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat); - - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(new_rot_mat, true); - } -} - -void RotatedSPOs::constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat) -{ - assert(delta_param.size() == act_rot_inds.size()); - assert(old_param.size() == full_rot_inds.size()); - assert(new_param.size() == full_rot_inds.size()); - - const size_t nmo = new_rot_mat.rows(); - assert(new_rot_mat.rows() == new_rot_mat.cols()); - - ValueMatrix old_rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); - exponentiate_antisym_matrix(old_rot_mat); - - ValueMatrix delta_rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); - exponentiate_antisym_matrix(delta_rot_mat); - - // Apply delta rotation to old rotation. - BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), - nmo); - - ValueMatrix log_rot_mat(nmo, nmo); - log_antisym_matrix(new_rot_mat, log_rot_mat); - extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); -} - -void RotatedSPOs::applyFullRotation(const std::vector& full_param, bool use_stored_copy) -{ - assert(full_param.size() == m_full_rot_inds.size()); - - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - rot_mat = ValueType(0); - - constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); - - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); -} - -void RotatedSPOs::applyRotationHistory() -{ - for (auto delta_param : history_params_) - { - apply_rotation(delta_param, false); - } -} - -// compute exponential of a real, antisymmetric matrix by diagonalizing and exponentiating eigenvalues -void RotatedSPOs::exponentiate_antisym_matrix(ValueMatrix& mat) -{ - const int n = mat.rows(); - std::vector> mat_h(n * n, 0); - std::vector eval(n, 0); - std::vector> work(2 * n, 0); - std::vector rwork(3 * n, 0); - std::vector> mat_d(n * n, 0); - std::vector> mat_t(n * n, 0); - // exponentiating e^X = e^iY (Y hermitian) - // i(-iX) = X, so -iX is hermitian - // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T - // construct hermitian analogue of mat by multiplying by -i - for (int i = 0; i < n; ++i) - { - for (int j = i; j < n; ++j) - { - mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); - mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); - } - } - // diagonalize the matrix - char JOBZ('V'); - char UPLO('U'); - int N(n); - int LDA(n); - int LWORK(2 * n); - int info = 0; - LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::exponentiate_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - // iterate through diagonal matrix, exponentiate terms - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - mat_d[i + j * n] = (i == j) ? std::exp(std::complex(0.0, eval[i])) : std::complex(0.0, 0.0); - } - } - // perform matrix multiplication - // assume row major - BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n, - std::complex(0.0, 0.0), &mat_t.at(0), n); - BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n, - std::complex(0.0, 0.0), &mat_d.at(0), n); - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_d[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in orbital rotation matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_d[i + n * j].imag() << std::endl; - } - mat[j][i] = mat_d[i + n * j].real(); - } -} - -void RotatedSPOs::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) -{ - const int n = mat.rows(); - std::vector mat_h(n * n, 0); - std::vector eval_r(n, 0); - std::vector eval_i(n, 0); - std::vector mat_l(n * n, 0); - std::vector work(4 * n, 0); - - std::vector> mat_cd(n * n, 0); - std::vector> mat_cl(n * n, 0); - std::vector> mat_ch(n * n, 0); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - mat_h[i + n * j] = mat[i][j]; - - // diagonalize the matrix - char JOBL('V'); - char JOBR('N'); - int N(n); - int LDA(n); - int LWORK(4 * n); - int info = 0; - LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, - &work.at(0), &LWORK, &info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::log_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - - // iterate through diagonal matrix, take log - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - auto tmp = (i == j) ? std::log(std::complex(eval_r[i], eval_i[i])) : std::complex(0.0, 0.0); - mat_cd[i + j * n] = tmp; - - if (eval_i[j] > 0.0) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], mat_l[i + (j + 1) * n]); - mat_cl[i + (j + 1) * n] = std::complex(mat_l[i + j * n], -mat_l[i + (j + 1) * n]); - } - else if (!(eval_i[j] < 0.0)) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], 0.0); - } - } - } - - RealType one(1.0); - RealType zero(0.0); - BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n); - BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n); - - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_cd[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in antisymmetric matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_cd[i + n * j].imag() << std::endl; - } - output[i][j] = mat_cd[i + n * j].real(); - } -} - -void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) -{ - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - const ParticleSet& P = VP.getRefPS(); - int iel = VP.refPtcl; - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T_orig; - T_orig.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T_orig.data(), nmo); - - SPOSet::ValueMatrix T; - T.resize(nel, nmo); - - ValueVector tmp_psi; - tmp_psi.resize(nmo); - - for (int iat = 0; iat < VP.getTotalNum(); iat++) - { - Phi->evaluateValue(VP, iat, tmp_psi); - - for (int j = 0; j < nmo; j++) - psiM_all(iel - FirstIndex, j) = tmp_psi[j]; - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - - // The matrix A is rectangular. Ainv is the inverse of the square part of the matrix. - // The multiply of Ainv and the square part of A is just the identity. - // This multiply could be reduced to Ainv and the non-square part of A. - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dratios(iat, kk) = T(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) - } - } - } -} - -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) -{ - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T; - T.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] = T(p, q); - } - } -} - -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) -{ - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - myG_temp.resize(nel); - myG_J.resize(nel); - myL_temp.resize(nel); - myL_J.resize(nel); - - myG_temp = 0; - myG_J = 0; - myL_temp = 0; - myL_J = 0; - - Bbar.resize(nel, nmo); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - Bbar = 0; - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - //current value of Gradient and Laplacian - // gradient components - for (int a = 0; a < nel; a++) - for (int i = 0; i < nel; i++) - for (int k = 0; k < 3; k++) - myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; - // laplacian components - for (int a = 0; a < nel; a++) - { - for (int i = 0; i < nel; i++) - myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); - } - - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) - { - myG_J[a] = (P.G[iat] - myG_temp[a]); - myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); - } - //possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); - - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - const ValueType* const B(Bbar.data()); - SPOSet::ValueMatrix T; - SPOSet::ValueMatrix Y1; - SPOSet::ValueMatrix Y2; - SPOSet::ValueMatrix Y3; - SPOSet::ValueMatrix Y4; - T.resize(nel, nmo); - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), T.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y3.data(), nmo); - - //possibly replace with BLAS call - Y4 = Y3 - Y2; - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += T(p, q); - dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); - } - } -} - -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -{ - bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) - { - int kk = myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - const int NP = P.getTotalNum(); - myG_temp.resize(NP); - myG_temp = 0.0; - myL_temp.resize(NP); - myL_temp = 0.0; - myG_J.resize(NP); - myG_J = 0.0; - myL_J.resize(NP); - myL_J = 0.0; - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - const RealType* restrict C_p = Coeff.data(); - for (int i = 0; i < Coeff.size(); i++) - { - const size_t upC = C2node_up[i]; - const size_t dnC = C2node_dn[i]; - const ValueType tmp1 = C_p[i] * detValues_dn[dnC]; - const ValueType tmp2 = C_p[i] * detValues_up[upC]; - for (size_t k = 0, j = N1; k < NP1; k++, j++) - { - myG_temp[j] += tmp1 * grads_up(upC, k); - myL_temp[j] += tmp1 * lapls_up(upC, k); - } - for (size_t k = 0, j = N2; k < NP2; k++, j++) - { - myG_temp[j] += tmp2 * grads_dn(dnC, k); - myL_temp[j] += tmp2 * lapls_dn(dnC, k); - } - } - - myG_temp *= (1 / psiCurrent); - myL_temp *= (1 / psiCurrent); - - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - for (int iat = 0; iat < (myL_temp.size()); iat++) - { - myG_J[iat] = (P.G[iat] - myG_temp[iat]); - myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); - } - - - table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, - detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn, - B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); - } -} - - -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) - { - int kk = myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up, - M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl); - } -} - -void RotatedSPOs::table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -GUIDE TO THE MATICES BEING BUILT ----------------------------------------------- -The idea here is that there is a loop over all unique determinants. For each determiant the table method is employed to calculate the contributions to the parameter derivatives (dhpsioverpsi/dlogpsi) - - loop through unquie determinants - loop through parameters - evaluate contributaion to dlogpsi and dhpsioverpsi -\noindent - - BLAS GUIDE for matrix multiplication of [ alpha * A.B + beta * C = C ] - Matrix A is of dimensions a1,a2 and Matrix B is b1,b2 in which a2=b1 - The BLAS command is as follows... - - BLAS::gemm('N','N', b2, a1, a2 ,alpha, B, b2, A, a2, beta, C, b2); - -Below is a human readable format for the matrix multiplications performed below... - -This notation is inspired by http://dx.doi.org/10.1063/1.4948778 -\newline -\hfill\break -$ - A_{i,j}=\phi_j(r_{i}) \\ - T = A^{-1} \widetilde{A} \\ - B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\ - \hat{O_{I}} = \hat{O}D_{I} \\ - D_{I}=det(A_{I}) \newline - \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\ - \Psi_{total} = \psi_{J}\psi_{MS} \\ - \alpha_{I} = P^{T}_{I}TQ_{I} \\ - M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\ -$ -\newline -There are three constants I use in the expressions for dhpsioverpsi and dlogpsi -\newline -\hfill\break -$ - const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1} C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})* Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\ -$ -\newline -Below is a translation of the shorthand I use to represent matrices independent of ``excitation matrix". -\newline -\hfill\break -$ - Y_{1} = A^{-1}B \\ - Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\ - Y_{3} = A^{-1}\widetilde{B} \\ - Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )\\ -$ -\newline -Below is a translation of the shorthand I use to represent matrices dependent on ``excitation" with respect to the reference Matrix and sums of matrices. Above this line I have represented these excitation matrices with a subscript ``I" but from this point on The subscript will be omitted and it is clear that whenever a matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix. -\newline -\hfill\break -$ - Y_{5} = TQ \\ - Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\ - Y_{7} = \alpha_{I}^{-1} P^{T} \\ - Y_{11} = \widetilde{M}Q \\ - Y_{23} = P^{T}\widetilde{M}Q \\ - Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\ - Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\ - Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\ -$ -\newline -So far you will notice that I have not included up or down arrows to specify what spin the matrices are of. This is because we are calculating the derivative of all up or all down spin orbital rotation parameters at a time. If we are finding the up spin derivatives then any term that is down spin will be constant. The following assumes that we are taking up-spin MO rotation parameter derivatives. Of course the down spin expression can be retrieved by swapping the up and down arrows. I have dubbed any expression with lowercase p prefix as a "precursor" to an expression actually used... -\newline -\hfill\break -$ - \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\ - \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\ - pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}) \\ - pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T}) \\ -$ -\newline -Now these p matrices will be used to make various expressions via BLAS commands. -\newline -\hfill\break -$ - K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\ - TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ - TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\ - K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ - MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\ - K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ \\ - K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ - TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ -$ -\newline -Now with all these matrices and constants the expressions of dhpsioverpsi and dlogpsi can be created. - - - - -In addition I will be using a special generalization of the kinetic operator which I will denote as O. Our Slater matrix with the special O operator applied to each element will be called B_bar - -$ -``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) -$ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -{ - ValueMatrix Table; - ValueMatrix Bbar; - ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; - ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - pK1.resize(nmo, nel); - K1T.resize(nmo, nmo); - TK1T.resize(nel, nmo); - - pK2.resize(nmo, nel); - K2AiB.resize(nmo, nmo); - TK2AiB.resize(nel, nmo); - K2XA.resize(nmo, nmo); - TK2XA.resize(nel, nmo); - K2T.resize(nmo, nmo); - TK2T.resize(nel, nmo); - MK2T.resize(nel, nmo); - - pK3.resize(nmo, nel); - K3T.resize(nmo, nmo); - TK3T.resize(nel, nmo); - - pK5.resize(nmo, nel); - K5T.resize(nmo, nmo); - TK5T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - //B_grad holds the gradient operator - //B_lapl holds the laplacian operator - //B_bar will hold our special O operator - - const int offset1(N1); - const int offset2(N2); - const int NPother(NP2); - - RealType* T(Table.data()); - - //possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); - - const RealType* restrict B(Bbar.data()); - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - BLAS::gemm('N', 'N', nel, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), T, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); - - //possibly replace with BLAS call - Y4 = Y3 - Y2; - - //Need to create the constants: (Oi, const0, const1, const2)to take advantage of minimal BLAS commands; - //Oi is the special operator applied to the slater matrix "A subscript i" from the total CI expansion - //\hat{O_{i}} = \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as \sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} - std::vector Oi(num_unique_dn_dets); - - for (int index = 0; index < num_unique_dn_dets; index++) - for (int iat = 0; iat < NPother; iat++) - Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + - myL_J[offset2 + iat] * detValues_dn[index]; - - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const2 = \sum_{i=1} C_{i}*det(A_{i\downarrow})* Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK1.begin(), pK1.end(), 0.0); - std::fill(pK2.begin(), pK2.end(), 0.0); - std::fill(pK3.begin(), pK3.end(), 0.0); - std::fill(pK5.begin(), pK5.end(), 0.0); - - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; - } - - else - { - //Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - //Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y11.resize(nel, k); - Y23.resize(k, k); - Y24.resize(k, k); - Y25.resize(k, k); - Y26.resize(k, nel); - - std::fill(Y11.begin(), Y11.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k); - } - - std::fill(Y23.begin(), Y23.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1); - } - - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k); - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k); - - - Y26.resize(k, nel); - - std::fill(Y26.begin(), Y26.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel); - } - - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - // c_Tr_AlphaI_MI is a constant contributing to constant const2 - // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] - RealType c_Tr_AlphaI_MI = 0.0; - for (int i = 0; i < k; i++) - { - c_Tr_AlphaI_MI += Y24(i, i); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); - const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]); - const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]); - - const2 += alpha_1; - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; - } - } - - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T, nmo, RealType(0.0), TK1T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T, nmo, RealType(0.0), TK2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T, nmo, RealType(0.0), TK2XA.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), - nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T, nmo, RealType(0.0), TK2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(), - nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T, nmo, RealType(0.0), TK3T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T, nmo, RealType(0.0), TK5T.data(), nmo); - - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dhpsioverpsi[kk] += - ValueType(-0.5 * Y4(i, j) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + - K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + - K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dhpsioverpsi[kk] += ValueType( - -0.5 * (Y4(i, j) - Y4(j, i)) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + - TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + - K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + K3T(i, j) - - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); - } - else - { - dhpsioverpsi[kk] += ValueType(-0.5 * - (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) - - + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); - } - } - } -} - -void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - ValueMatrix Table; - ValueMatrix Y5, Y6, Y7; - ValueMatrix pK4, K4T, TK4T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - pK4.resize(nmo, nel); - K4T.resize(nmo, nmo); - TK4T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - - RealType* T(Table.data()); - - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK4.begin(), pK4.end(), 0.0); - - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; - } - - else - { - //Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - //Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent)); - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; - } - } - - BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T, nmo, RealType(0.0), TK4T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - K4T(j, i) - TK4T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); - } - else - { - dlogpsi[kk] += ValueType((K4T(i, j) - K4T(j, i))); - } - } - } -} - - -std::unique_ptr RotatedSPOs::makeClone() const -{ - auto myclone = std::make_unique(my_name_, std::unique_ptr(Phi->makeClone())); - - myclone->params = this->params; - myclone->params_supplied = this->params_supplied; - myclone->m_act_rot_inds = this->m_act_rot_inds; - myclone->m_full_rot_inds = this->m_full_rot_inds; - myclone->myVars = this->myVars; - myclone->myVarsFull = this->myVarsFull; - myclone->history_params_ = this->history_params_; - myclone->use_global_rot_ = this->use_global_rot_; - return myclone; -} - -void RotatedSPOs::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); -} - -void RotatedSPOs::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); -} - -void RotatedSPOs::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); -} - -void RotatedSPOs::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); -} - -void RotatedSPOs::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); -} - -void RotatedSPOs::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads, - spingrads); -} - -void RotatedSPOs::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list); -} - -void RotatedSPOs::createResource(ResourceCollection& collection) const { Phi->createResource(collection); } - -void RotatedSPOs::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.acquireResource(collection, phi_list); -} - -void RotatedSPOs::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.releaseResource(collection, phi_list); -} - -RefVectorWithLeader RotatedSPOs::extractPhiRefList(const RefVectorWithLeader& spo_list) -{ - auto& spo_leader = spo_list.getCastedLeader(); - const auto nw = spo_list.size(); - RefVectorWithLeader phi_list(*spo_leader.Phi); - phi_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) - { - RotatedSPOs& rot = spo_list.getCastedElement(iw); - phi_list.emplace_back(*rot.Phi); - } - return phi_list; -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h index 6e4c45d525..8c7e17a8f6 100644 --- a/src/QMCWaveFunctions/RotatedSPOs.h +++ b/src/QMCWaveFunctions/RotatedSPOs.h @@ -13,445 +13,11 @@ #ifndef QMCPLUSPLUS_ROTATION_HELPER_H #define QMCPLUSPLUS_ROTATION_HELPER_H -#include "QMCWaveFunctions/SPOSet.h" - +#include "QMCWaveFunctions/RotatedSPOsT.h" namespace qmcplusplus { -class RotatedSPOs; -namespace testing -{ -opt_variables_type& getMyVarsFull(RotatedSPOs& rot); -std::vector>& getHistoryParams(RotatedSPOs& rot); -} // namespace testing - -class RotatedSPOs : public SPOSet, public OptimizableObject -{ -public: - //constructor - RotatedSPOs(const std::string& my_name, std::unique_ptr&& spos); - //destructor - ~RotatedSPOs() override; - - std::string getClassName() const override { return "RotatedSPOs"; } - bool isOptimizable() const override { return true; } - bool isOMPoffload() const override { return Phi->isOMPoffload(); } - bool hasIonDerivs() const override { return Phi->hasIonDerivs(); } - - // Vector of rotation matrix indices - using RotationIndices = std::vector>; - - // Active orbital rotation parameter indices - RotationIndices m_act_rot_inds; - - // Full set of rotation values for global rotation - RotationIndices m_full_rot_inds; - - // Construct a list of the matrix indices for non-zero rotation parameters. - // (The structure for a sparse representation of the matrix) - // Only core->active rotations are created. - static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); - - // Construct a list for all the matrix indices, including core->active, core->core and active->active - static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); - - // Fill in antisymmetric matrix from the list of rotation parameter indices - // and a list of parameter values. - // This function assumes rot_mat is properly sized upon input and is set to zero. - static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat); - - // Extract the list of rotation parameters from the entries in an antisymmetric matrix - // This function expects rot_indices and param are the same length. - static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param); - - //function to perform orbital rotations - void apply_rotation(const std::vector& param, bool use_stored_copy); - - // For global rotation, inputs are the old parameters and the delta parameters. - // The corresponding rotation matrices are constructed, multiplied together, - // and the new parameters extracted. - // The new rotation is applied to the underlying SPO coefficients - void applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param); - - // Perform the construction of matrices and extraction of parameters for a delta rotation. - // Split out and made static for testing. - static void constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat); - - // When initializing the rotation from VP files - // This function applies the rotation history - void applyRotationHistory(); - - // This function applies the global rotation (similar to apply_rotation, but for the full - // set of rotation parameters) - void applyFullRotation(const std::vector& full_param, bool use_stored_copy); - - // Compute matrix exponential of an antisymmetric matrix (result is rotation matrix) - static void exponentiate_antisym_matrix(ValueMatrix& mat); - - // Compute matrix log of rotation matrix to produce antisymmetric matrix - static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); - - //A particular SPOSet used for Orbitals - std::unique_ptr Phi; - - /// Set the rotation parameters (usually from input file) - void setRotationParameters(const std::vector& param_list); - - /// the number of electrons of the majority spin - size_t nel_major_; - - std::unique_ptr makeClone() const override; - - // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the Determinant part of the wfn - // myG_J is the Gradient of the all other parts of the wavefunction (typically just the Jastrow). - // It represents \frac{\nabla\psi_{J}}{\psi_{J}} - // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The Laplacian portion - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - - ValueMatrix Bbar; - ValueMatrix psiM_inv; - ValueMatrix psiM_all; - GradMatrix dpsiM_all; - ValueMatrix d2psiM_all; - - - // Single Slater creation - void buildOptVariables(size_t nel); - - // For the MSD case rotations must be created in MultiSlaterDetTableMethod class - void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); - - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) override; - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) override; - - //helper function to evaluatederivative; evaluate orbital rotation parameter derivative using table method - void table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - void table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); } - - void checkInVariablesExclusive(opt_variables_type& active) override - { - if (myVars.size()) - active.insertFrom(myVars); - } - - void checkOutVariables(const opt_variables_type& active) override { myVars.getIndex(active); } - - ///reset - void resetParametersExclusive(const opt_variables_type& active) override; - - void writeVariationalParameters(hdf_archive& hout) override; - - void readVariationalParameters(hdf_archive& hin) override; - - //********************************************************************************* - //the following functions simply call Phi's corresponding functions - void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); } - - void checkObject() const override { Phi->checkObject(); } - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateValue(P, iat, psi); - } - - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); - } - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - } - - void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) override; - - void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - - void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); - } - - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi); - } - - // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) - // {Phi->evaluateThridDeriv(P, first, last, grad_grad_grad_logdet); } - - /// Use history list (false) or global rotation (true) - void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const override; - - void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void createResource(ResourceCollection& collection) const override; - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - -private: - /// true if SPO parameters (orbital rotation parameters) have been supplied by input - bool params_supplied; - /// list of supplied orbital rotation parameters - std::vector params; - - /// Full set of rotation matrix parameters for use in global rotation method - opt_variables_type myVarsFull; - - /// timer for apply_rotation - NewTimer& apply_rotation_timer_; - - /// List of previously applied parameters - std::vector> history_params_; - - static RefVectorWithLeader extractPhiRefList(const RefVectorWithLeader& spo_list); - - /// Use global rotation or history list - bool use_global_rot_ = true; - - friend opt_variables_type& testing::getMyVarsFull(RotatedSPOs& rot); - friend std::vector>& testing::getHistoryParams(RotatedSPOs& rot); -}; - +using RotatedSPOs = RotatedSPOsT; } //namespace qmcplusplus diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index 1aa8af8ada..116cc3d143 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -117,9 +117,8 @@ RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( } } -template -void -RotatedSPOsT::resetParametersExclusive(const OptVariablesType& active) +template +void RotatedSPOsT::resetParametersExclusive(const OptVariablesTypeT& active) { std::vector delta_param(m_act_rot_inds.size()); @@ -658,12 +657,15 @@ RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) } } -template -void -RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex) +template +void RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) { Phi->evaluateDetRatios(VP, psi, psiinv, ratios); @@ -740,11 +742,12 @@ RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, } } -template -void -RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -789,11 +792,13 @@ RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const int& FirstIndex, const int& LastIndex) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -895,21 +900,33 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, const ValueMatrix& B_lapl, - const std::vector& detData_up, const size_t N1, const size_t N2, - const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) { bool recalculate(false); for (int k = 0; k < this->myVars.size(); ++k) { @@ -970,17 +987,22 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { bool recalculate(false); for (int k = 0; k < this->myVars.size(); ++k) { diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h index fa4778a6f4..1ee3b07533 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.h +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -23,10 +23,8 @@ template class RotatedSPOsT; namespace testing { -OptVariablesType& -getMyVarsFull(RotatedSPOsT& rot); -OptVariablesType& -getMyVarsFull(RotatedSPOsT& rot); +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); std::vector>& getHistoryParams(RotatedSPOsT& rot); std::vector>& @@ -40,6 +38,7 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT using IndexType = typename SPOSetT::IndexType; using RealType = typename SPOSetT::RealType; using ValueType = typename SPOSetT::ValueType; + using FullValueType = typename SPOSetT::FullValueType; using GradType = typename SPOSetT::GradType; using ComplexType = typename SPOSetT::ComplexType; using FullRealType = typename SPOSetT::FullRealType; @@ -199,40 +198,61 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); - void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, - const int& LastIndex) override; - - void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex) override; - - void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, - const std::vector& Coeff, const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const GradMatrix& grads_up, - const GradMatrix& grads_dn, const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) override; - - void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const std::vector& detData_up, - const std::vector>& lookup_tbl) override; + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) override; + + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) override; // helper function to evaluatederivative; evaluate orbital rotation // parameter derivative using table method @@ -268,22 +288,16 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT opt_obj_refs.push_back(*this); } - void - checkInVariablesExclusive(OptVariablesType& active) override + void checkInVariablesExclusive(OptVariablesTypeT& active) override { if (this->myVars.size()) active.insertFrom(this->myVars); } - void - checkOutVariables(const OptVariablesType& active) override - { - this->myVars.getIndex(active); - } + void checkOutVariables(const OptVariablesTypeT& active) override { this->myVars.getIndex(active); } /// reset - void - resetParametersExclusive(const OptVariablesType& active) override; + void resetParametersExclusive(const OptVariablesTypeT& active) override; void writeVariationalParameters(hdf_archive& hout) override; @@ -327,11 +341,14 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT Phi->evaluateDetRatios(VP, psi, psiinv, ratios); } - void - evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex) override; + void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) override; void evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, @@ -474,7 +491,7 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT std::vector params; /// Full set of rotation matrix parameters for use in global rotation method - OptVariablesType myVarsFull; + OptVariablesTypeT myVarsFull; /// List of previously applied parameters std::vector> history_params_; @@ -484,11 +501,8 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT static RefVectorWithLeader> extractPhiRefList(const RefVectorWithLeader>& spo_list); - - friend OptVariablesType& - testing::getMyVarsFull(RotatedSPOsT& rot); - friend OptVariablesType& - testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); friend std::vector>& diff --git a/src/QMCWaveFunctions/SPOSet.cpp b/src/QMCWaveFunctions/SPOSet.cpp deleted file mode 100644 index dacfd4423b..0000000000 --- a/src/QMCWaveFunctions/SPOSet.cpp +++ /dev/null @@ -1,406 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SPOSet.h" -#include "Message/Communicate.h" -#include "Numerics/MatrixOperators.h" -#include "OhmmsData/AttributeSet.h" -#include "CPU/SIMD/inner_product.hpp" -#include "Utilities/ProgressReportEngine.h" -#include "hdf/hdf_archive.h" -#include - -namespace qmcplusplus -{ -SPOSet::SPOSet(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0) {} - -void SPOSet::extractOptimizableObjectRefs(UniqueOptObjRefs&) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::extractOptimizableObjectRefs " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::checkOutVariables(const opt_variables_type& active) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::checkOutVariables " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - assert(psi.size() == psiinv.size()); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } -} - -void SPOSet::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - { - Vector invRow(const_cast(invRow_ptr_list[iw]), psi_list[iw].get().size()); - spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]); - } -} - -void SPOSet::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin"); -} - -void SPOSet::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]); -} - -void SPOSet::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]); -} - -void SPOSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n"); -} - -void SPOSet::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - GradVector dphi_v(norb_requested); - for (int iw = 0; iw < nw; iw++) - { - ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested); - ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested); - spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v); - - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested); - grads[iw] = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw]; - - // transpose the array of gradients to SoA in phi_vgl_v - for (size_t idim = 0; idim < DIM; idim++) - { - ValueType* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0); - for (size_t iorb = 0; iorb < norb_requested; iorb++) - phi_g[iorb] = dphi_v[iorb][idim]; - } - } - phi_vgl_v.updateTo(); -} - -void SPOSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n"); -} - -void SPOSet::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n"); -} - -void SPOSet::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n"); -} - -void SPOSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]); -} - -void SPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n"); -} - -void SPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n"); -} - - -std::unique_ptr SPOSet::makeClone() const -{ - throw std::runtime_error("Missing SPOSet::makeClone for " + getClassName()); -} - -void SPOSet::basic_report(const std::string& pad) const -{ - app_log() << pad << "size = " << size() << std::endl; - app_log() << pad << "state info:" << std::endl; - //states.report(pad+" "); - app_log().flush(); -} - -void SPOSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n"); -} - -void SPOSet::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n"); -} - -void SPOSet::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (isRotationSupported()) - throw std::logic_error("Bug!! " + getClassName() + - "::applyRotation " - "must be overloaded when the SPOSet supports rotation."); -} - -void SPOSet::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) -{ - // Match the fallback in WaveFunctionComponent that evaluates just the ratios - evaluateDetRatios(VP, psi, psiinv, ratios); - - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivRatios " - "must be overloaded when the SPOSet is optimizable."); -} - - -/** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ -void SPOSet::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); -} - -/** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ -void SPOSet::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); -} - - -void SPOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSourceRow " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n"); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 4f7c8e8175..0313f3ebfc 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -20,544 +20,11 @@ #ifndef QMCPLUSPLUS_SINGLEPARTICLEORBITALSETBASE_H #define QMCPLUSPLUS_SINGLEPARTICLEORBITALSETBASE_H -#include "OhmmsPETE/OhmmsArray.h" -#include "Particle/ParticleSet.h" -#include "Particle/VirtualParticleSet.h" -#include "QMCWaveFunctions/OrbitalSetTraits.h" -#include "OptimizableObject.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "DualAllocatorAliases.hpp" +#include "QMCWaveFunctions/SPOSetT.h" namespace qmcplusplus { -class ResourceCollection; - -class SPOSet; -namespace testing -{ -opt_variables_type& getMyVars(SPOSet& spo); -} - - -/** base class for Single-particle orbital sets - * - * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains - * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$ - */ -class SPOSet : public QMCTraits -{ -public: - using ValueVector = OrbitalSetTraits::ValueVector; - using ValueMatrix = OrbitalSetTraits::ValueMatrix; - using GradVector = OrbitalSetTraits::GradVector; - using GradMatrix = OrbitalSetTraits::GradMatrix; - using HessVector = OrbitalSetTraits::HessVector; - using HessMatrix = OrbitalSetTraits::HessMatrix; - using GGGVector = OrbitalSetTraits::GradHessVector; - using GGGMatrix = OrbitalSetTraits::GradHessMatrix; - using SPOMap = std::map>; - using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = Array>; // [walker, Orbs] - template - using OffloadMatrix = Matrix>; - - /** constructor */ - SPOSet(const std::string& my_name); - - /** destructor - * - * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet. - */ - virtual ~SPOSet() = default; - - /** return the size of the orbital set - * Ye: this needs to be replaced by getOrbitalSetSize(); - */ - inline int size() const { return OrbitalSetSize; } - - /** print basic SPOSet information - */ - void basic_report(const std::string& pad = "") const; - - /** print SPOSet information - */ - virtual void report(const std::string& pad = "") const { basic_report(pad); } - - - /** return the size of the orbitals - */ - inline int getOrbitalSetSize() const { return OrbitalSetSize; } - - /// Query if this SPOSet is optimizable - virtual bool isOptimizable() const { return false; } - - /** extract underlying OptimizableObject references - * @param opt_obj_refs aggregated list of optimizable object references - */ - virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs); - - /** check out variational optimizable variables - * @param active a super set of optimizable variables - */ - virtual void checkOutVariables(const opt_variables_type& active); - - /// Query if this SPOSet uses OpenMP offload - virtual bool isOMPoffload() const { return false; } - - /** Query if this SPOSet has an explicit ion dependence. returns true if it does. - */ - virtual bool hasIonDerivs() const { return false; } - - /// check a few key parameters before putting the SPO into a determinant - virtual void checkObject() const {} - - /// return true if this SPOSet can be wrappered by RotatedSPO - virtual bool isRotationSupported() const { return false; } - /// store parameters before getting destroyed by rotation. - virtual void storeParamsBeforeRotation() {} - /// apply rotation to all the orbitals - virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); - - /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex); - - /// Parameter derivatives of the wavefunction - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly. - * It doesn't make sense allowing to set the value at any place in the code. - */ - virtual void setOrbitalSetSize(int norbs) = 0; - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) = 0; - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP - * @param VP virtual particle set - * @param psi values of the SPO, used as a scratch space if needed - * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually - * @param ratios return determinant ratios - */ - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios); - - - /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves - virtual void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex); - - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param vp_list a list of virtual particle sets in a walker batch - * @param psi_list a list of values of the SPO, used as a scratch space if needed - * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually - * @param ratios_list a list of returning determinant ratios - */ - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - */ - virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) = 0; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradients of the SPO - */ - virtual void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin); - - /** evaluate the values this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - */ - virtual void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - */ - virtual void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin is a dual matrix of spin gradients [nw][norb] - * Note that the device side of mw_dspin is up to date - */ - virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param psi_ratio_grads_v determinant ratio and grads of all the walkers - */ - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - */ - virtual void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi); - - /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - * @param grad_grad_grad_psi grad hessians of the SPO - */ - virtual void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi); - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi); - - /** evaluate the third derivatives of this single-particle orbital set - * @param P current ParticleSet - * @param first first particle - * @param last last particle - * @param grad_grad_grad_logdet third derivatives of the SPO - */ - virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param[in] P current ParticleSet - * @param[in] first starting index of the particles - * @param[in] last ending index of the particles - * @param[out] logdet determinant matrix to be inverted - * @param[out] dlogdet gradients - * @param[out] d2logdet laplacians - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) = 0; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * @param dspinlogdet, spin gradients - * - * default implementation will abort for all SPOSets except SpinorSet - * - */ - virtual void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet); - - virtual void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet); - - /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * @param grad_grad_grad_logdet third derivatives - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi); - - /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients of values - * @param grad_grad_phi gradients of gradients - * @param grad_lapl_phi gradients of laplacians - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi); - - /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. - * - * @param[in] P particle set. - * @param[in] iel The electron at which to evaluate phi(r_iel) - * @param[in] source ion particle set. - * @param[in] iat_src ion ID w.r.t. which to take derivative. - * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). - * @return Void - */ - virtual void evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi); - - /** access the k point related to the given orbital */ - virtual PosType get_k(int orb) { return PosType(); } - - /** initialize a shared resource and hand it to collection - */ - virtual void createResource(ResourceCollection& collection) const {} - - /** acquire a shared resource from collection - */ - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const {} - - /** return a shared resource to collection - */ - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const {} - - /** make a clone of itself - * every derived class must implement this to have threading working correctly. - */ - [[noreturn]] virtual std::unique_ptr makeClone() const; - - /** Used only by cusp correction in AOS LCAO. - * Ye: the SoA LCAO moves all this responsibility to the builder. - * This interface should be removed with AoS. - */ - virtual bool transformSPOSet() { return true; } - - /** finalize the construction of SPOSet - * - * for example, classes serving accelerators may need to transfer data from host to device - * after the host side objects are built. - */ - virtual void finalizeConstruction() {} - - /// return object name - const std::string& getName() const { return my_name_; } - - /// return class name - virtual std::string getClassName() const = 0; - -protected: - /// name of the object, unique identifier - const std::string my_name_; - ///number of Single-particle orbitals - IndexType OrbitalSetSize; - /// Optimizable variables - opt_variables_type myVars; - - friend opt_variables_type& testing::getMyVars(SPOSet& spo); -}; - +using SPOSet = SPOSetT; using SPOSetPtr = SPOSet*; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilder.cpp b/src/QMCWaveFunctions/SPOSetBuilder.cpp deleted file mode 100644 index 4264cb15c4..0000000000 --- a/src/QMCWaveFunctions/SPOSetBuilder.cpp +++ /dev/null @@ -1,182 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SPOSetBuilder.h" -#include "OhmmsData/AttributeSet.h" -#include - -#if !defined(QMC_COMPLEX) -#include "QMCWaveFunctions/RotatedSPOs.h" -#endif - -namespace qmcplusplus -{ -SPOSetBuilder::SPOSetBuilder(const std::string& type_name, Communicate* comm) - : MPIObjectBase(comm), legacy(true), type_name_(type_name) -{ - reserve_states(); -} - - -void SPOSetBuilder::reserve_states(int nsets) -{ - int sets_needed = nsets - states.size(); - if (sets_needed > 0) - for (int s = 0; s < sets_needed; ++s) - states.push_back(std::make_unique()); -} - - -std::unique_ptr SPOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) -{ - myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented"); - return 0; -} - - -std::unique_ptr SPOSetBuilder::createSPOSet(xmlNodePtr cur) -{ - std::string spo_object_name; - std::string optimize("no"); - - OhmmsAttributeSet attrib; - attrib.add(spo_object_name, "id"); - attrib.add(spo_object_name, "name"); - attrib.add(optimize, "optimize"); - attrib.put(cur); - - app_summary() << std::endl; - app_summary() << " Single particle orbitals (SPO)" << std::endl; - app_summary() << " ------------------------------" << std::endl; - app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ - << " Builder class name: " << ClassName << std::endl; - app_summary() << std::endl; - - if (spo_object_name.empty()) - myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); - - // read specialized sposet construction requests - // and translate them into a set of orbital indices - SPOSetInputInfo input_info(cur); - - // process general sposet construction requests - // and preserve legacy interface - std::unique_ptr sposet; - - try - { - if (legacy && input_info.legacy_request) - sposet = createSPOSetFromXML(cur); - else - sposet = createSPOSet(cur, input_info); - } - catch (const UniformCommunicateError& ue) - { - myComm->barrier_and_abort(ue.what()); - } - - if (!sposet) - myComm->barrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); - - if (optimize == "rotation" || optimize == "yes") - { -#ifdef QMC_COMPLEX - app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n"; - abort(); -#else - app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead" - << std::endl; - - sposet->storeParamsBeforeRotation(); - // create sposet with rotation - auto& sposet_ref = *sposet; - app_log() << " SPOSet " << sposet_ref.getName() << " is optimizable\n"; - if (!sposet_ref.isRotationSupported()) - myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" + - sposet_ref.getClassName() + "'."); - auto rot_spo = std::make_unique(sposet_ref.getName(), std::move(sposet)); - xmlNodePtr tcur = cur->xmlChildrenNode; - while (tcur != NULL) - { - std::string cname((const char*)(tcur->name)); - if (cname == "opt_vars") - { - std::vector params; - putContent(params, tcur); - rot_spo->setRotationParameters(params); - } - tcur = tcur->next; - } - sposet = std::move(rot_spo); -#endif - } - - if (sposet->getName().empty()) - app_warning() << "SPOSet object doesn't have a name." << std::endl; - if (!spo_object_name.empty() && sposet->getName() != spo_object_name) - app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name - << " object name: " << sposet->getName() << std::endl; - - sposet->checkObject(); - return sposet; -} - -std::unique_ptr SPOSetBuilder::createRotatedSPOSet(xmlNodePtr cur) -{ - std::string spo_object_name; - std::string method; - OhmmsAttributeSet attrib; - attrib.add(spo_object_name, "name"); - attrib.add(method, "method", {"global", "history"}); - attrib.put(cur); - - -#ifdef QMC_COMPLEX - myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet."); - return nullptr; -#else - std::unique_ptr sposet; - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "sposet") - { - sposet = createSPOSet(element); - } - }); - - if (!sposet) - myComm->barrier_and_abort("Rotated SPO needs an SPOset"); - - if (!sposet->isRotationSupported()) - myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" + - sposet->getClassName() + "'."); - - sposet->storeParamsBeforeRotation(); - auto rot_spo = std::make_unique(spo_object_name, std::move(sposet)); - - if (method == "history") - rot_spo->set_use_global_rotation(false); - - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "opt_vars") - { - std::vector params; - putContent(params, element); - rot_spo->setRotationParameters(params); - } - }); - return rot_spo; -#endif -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilder.h b/src/QMCWaveFunctions/SPOSetBuilder.h index 8827df190e..6f500ba61f 100644 --- a/src/QMCWaveFunctions/SPOSetBuilder.h +++ b/src/QMCWaveFunctions/SPOSetBuilder.h @@ -21,71 +21,12 @@ #ifndef QMCPLUSPLUS_SPOSET_BUILDER_H #define QMCPLUSPLUS_SPOSET_BUILDER_H -#include -#include -#include -#include "Message/MPIObjectBase.h" -#include "QMCWaveFunctions/SPOSetInfo.h" -#include "QMCWaveFunctions/SPOSetInputInfo.h" -#include "QMCWaveFunctions/SPOSet.h" -#include "hdf/hdf_archive.h" +#include "Configuration.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" namespace qmcplusplus { -/** base class for the real SPOSet builder - * - * \warning { - * We have not quite figured out how to use real/complex efficiently. - * There are three cases we have to deal with - * - real basis functions and real coefficients - * - real basis functions and complex coefficients - * - complex basis functions and complex coefficients - * For now, we decide to keep both real and complex basis sets and expect - * the user classes {\bf KNOW} what they need to use. - * } - */ -class SPOSetBuilder : public QMCTraits, public MPIObjectBase -{ -public: - using indices_t = std::vector; - using energies_t = std::vector; - - /// whether implementation conforms only to legacy standard - bool legacy; - - /// state info of all possible states available in the basis - std::vector> states; - - SPOSetBuilder(const std::string& type_name, Communicate* comm); - virtual ~SPOSetBuilder() {} - - /// reserve space for states (usually only one set, multiple for e.g. spin dependent einspline) - void reserve_states(int nsets = 1); - - /// allow modification of state information - inline void modify_states(int index = 0) { states[index]->modify(); } - - /// clear state information - inline void clear_states(int index = 0) { states[index]->clear(); } - - /// create an sposet from xml and save the resulting SPOSet - std::unique_ptr createSPOSet(xmlNodePtr cur); - - /// create orbital rotation transformation from xml and save the resulting SPOSet - std::unique_ptr createRotatedSPOSet(xmlNodePtr cur); - - const std::string& getTypeName() const { return type_name_; } - -protected: - /// create an sposet from xml (legacy) - virtual std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) = 0; - - /// create an sposet from a general xml request - virtual std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info); - - /// type name of the SPO objects built by this builder. - const std::string type_name_; -}; +using SPOSetBuilder = SPOSetBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactory.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactory.cpp deleted file mode 100644 index 25932eeb45..0000000000 --- a/src/QMCWaveFunctions/SPOSetBuilderFactory.cpp +++ /dev/null @@ -1,229 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2020 QMCPACK developers. -// -// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University -// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SPOSetBuilderFactory.h" -#include "SPOSetScanner.h" -#include "HarmonicOscillator/SHOSetBuilder.h" -#include "PlaneWave/PWOrbitalSetBuilder.h" -#include "ModernStringUtils.hpp" -#include "ElectronGas/FreeOrbitalBuilder.h" -#if OHMMS_DIM == 3 -#include "LCAO/LCAOrbitalBuilder.h" - -#if defined(QMC_COMPLEX) -#include "BsplineFactory/EinsplineSpinorSetBuilder.h" -#include "LCAO/LCAOSpinorBuilder.h" -#endif - -#if defined(HAVE_EINSPLINE) -#include "BsplineFactory/EinsplineSetBuilder.h" -#endif -#endif -#include "CompositeSPOSet.h" -#include "Utilities/ProgressReportEngine.h" -#include "Utilities/IteratorUtility.h" -#include "OhmmsData/AttributeSet.h" -#include "Message/MPIObjectBase.h" - - -namespace qmcplusplus -{ -const SPOSet* SPOSetBuilderFactory::getSPOSet(const std::string& name) const -{ - if (auto spoit = sposets.find(name); spoit == sposets.end()) - { - // keep this commented until legacy input styles are moved. - // In legacy input styles, this look up may fail and need to build SPOSet on the fly. - return nullptr; - } - else - return spoit->second.get(); -} - -/** constructor - * \param els reference to the electrons - * \param psi reference to the wavefunction - * \param ions reference to the ions - */ -SPOSetBuilderFactory::SPOSetBuilderFactory(Communicate* comm, ParticleSet& els, const PSetMap& psets) - : MPIObjectBase(comm), targetPtcl(els), ptclPool(psets) -{ - ClassName = "SPOSetBuilderFactory"; -} - -SPOSetBuilderFactory::~SPOSetBuilderFactory() { DEBUG_MEMORY("SPOSetBuilderFactory::~SPOSetBuilderFactory"); } - -std::unique_ptr SPOSetBuilderFactory::createSPOSetBuilder(xmlNodePtr rootNode) -{ - ReportEngine PRE(ClassName, "createSPOSetBuilder"); - std::string sourceOpt("ion0"); - std::string type(""); - std::string name(""); - OhmmsAttributeSet aAttrib; - aAttrib.add(sourceOpt, "source"); - aAttrib.add(type, "type"); - aAttrib.add(name, "name"); - - if (rootNode != NULL) - aAttrib.put(rootNode); - - std::string type_in = type; - type = lowerCase(type); - - //when name is missing, type becomes the input - if (name.empty()) - name = type_in; - - std::unique_ptr bb; - - if (type == "composite") - { - app_log() << "Composite SPO set with existing SPOSets." << std::endl; - bb = std::make_unique(myComm, *this); - } - else if (type == "jellium" || type == "heg" || type == "free") - { - app_log() << "Free-particle SPO set" << std::endl; - bb = std::make_unique(targetPtcl, myComm, rootNode); - } - else if (type == "sho") - { - app_log() << "Harmonic Oscillator SPO set" << std::endl; - bb = std::make_unique(targetPtcl, myComm); - } - else if (type == "PWBasis" || type == "PW" || type == "pw") - { - app_log() << "Planewave basis SPO set" << std::endl; - bb = std::make_unique(targetPtcl, myComm, rootNode); - } -#if OHMMS_DIM == 3 - else if (type.find("spline") < type.size()) - { - if (targetPtcl.isSpinor()) - { -#ifdef QMC_COMPLEX - app_log() << "Einspline Spinor Set\n"; - bb = std::make_unique(targetPtcl, ptclPool, myComm, rootNode); -#else - PRE.error("Use of einspline spinors requires QMC_COMPLEX=1. Rebuild with this option"); -#endif - } - else - { -#if defined(HAVE_EINSPLINE) - PRE << "EinsplineSetBuilder: using libeinspline for B-spline orbitals.\n"; - bb = std::make_unique(targetPtcl, ptclPool, myComm, rootNode); -#else - PRE.error("Einspline is missing for B-spline orbitals", true); -#endif - } - } - else if (type == "molecularorbital" || type == "mo") - { - ParticleSet* ions = nullptr; - //initialize with the source tag - auto pit(ptclPool.find(sourceOpt)); - if (pit == ptclPool.end()) - PRE.error("Missing basisset/@source.", true); - else - ions = pit->second.get(); - if (targetPtcl.isSpinor()) -#ifdef QMC_COMPLEX - bb = std::make_unique(targetPtcl, *ions, myComm, rootNode); -#else - PRE.error("Use of lcao spinors requires QMC_COMPLEX=1. Rebuild with this option"); -#endif - else - bb = std::make_unique(targetPtcl, *ions, myComm, rootNode); - } -#endif //OHMMS_DIM==3 - PRE.flush(); - - if (!bb) - myComm->barrier_and_abort("SPOSetBuilderFactory::createSPOSetBuilder SPOSetBuilder creation failed."); - - app_log() << " Created SPOSet builder named '" << name << "' of type " << type << std::endl; - return bb; -} - - -void SPOSetBuilderFactory::buildSPOSetCollection(xmlNodePtr cur) -{ - std::string collection_name; - std::string collection_type; - OhmmsAttributeSet attrib; - attrib.add(collection_name, "name"); - attrib.add(collection_type, "type"); - attrib.put(cur); - - // use collection_type as collection_name if collection_name is not given - if (collection_name.empty()) - collection_name = collection_type; - - app_summary() << std::endl; - app_summary() << " Single particle orbitals (SPO) collection" << std::endl; - app_summary() << " -----------------------------------------" << std::endl; - app_summary() << " Name: " << collection_name << " Type input: " << collection_type << std::endl; - app_summary() << std::endl; - - // create the SPOSet builder - auto bb = createSPOSetBuilder(cur); - - // going through a list of sposet entries - int nsposets = 0; - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "sposet") - { - addSPOSet(std::unique_ptr(bb->createSPOSet(element))); - nsposets++; - } - if (cname == "rotated_sposet") - { - addSPOSet(std::unique_ptr(bb->createRotatedSPOSet(element))); - nsposets++; - } - }); - - if (nsposets == 0) - myComm->barrier_and_abort("SPOSetBuilderFactory::buildSPOSetCollection no elements found"); - - // going through a list of spo_scanner entries - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "spo_scanner") - if (myComm->rank() == 0) - { - SPOSetScanner ascanner(sposets, targetPtcl, ptclPool); - ascanner.put(element); - } - }); -} - -void SPOSetBuilderFactory::addSPOSet(std::unique_ptr spo) -{ - if (spo->getName().empty()) - myComm->barrier_and_abort("sposet created in sposet_collection must have a name!"); - - if (sposets.find(spo->getName()) != sposets.end()) - myComm->barrier_and_abort("The name of each sposet must be unique! '" + spo->getName() + "' exists."); - else - sposets.emplace(spo->getName(), std::move(spo)); -} - -std::string SPOSetBuilderFactory::basisset_tag = "basisset"; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactory.h b/src/QMCWaveFunctions/SPOSetBuilderFactory.h index 78db70ca76..be31b52a5e 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactory.h +++ b/src/QMCWaveFunctions/SPOSetBuilderFactory.h @@ -16,57 +16,11 @@ #ifndef QMCPLUSPLUS_BASISSETFACTORY_H #define QMCPLUSPLUS_BASISSETFACTORY_H -#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "type_traits/template_types.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/SPOSetBuilderFactoryT.h" namespace qmcplusplus { -class SPOSetBuilderFactory : public MPIObjectBase -{ -public: - using SPOMap = SPOSet::SPOMap; - using PSetMap = std::map>; - - /** constructor - * \param comm communicator - * \param els reference to the electrons - * \param ions reference to the ions - */ - SPOSetBuilderFactory(Communicate* comm, ParticleSet& els, const PSetMap& psets); - - ~SPOSetBuilderFactory(); - - std::unique_ptr createSPOSetBuilder(xmlNodePtr rootNode); - - /** returns a named sposet from the pool - * only use in serial portion of execution - * ie during initialization prior to threaded code - */ - const SPOSet* getSPOSet(const std::string& name) const; - - void buildSPOSetCollection(xmlNodePtr cur); - - bool empty() const { return sposets.empty(); } - - /** add an SPOSet to sposets map. - * This is only used to handle legacy SPOSet input styles without using sposet_collection - */ - void addSPOSet(std::unique_ptr); - - SPOMap&& exportSPOSets() { return std::move(sposets); } - -private: - ///reference to the target particle - ParticleSet& targetPtcl; - - ///reference to the particle pool - const PSetMap& ptclPool; - - /// list of all sposets created by the builders of this factory - SPOMap sposets; - - static std::string basisset_tag; -}; +using SPOSetBuilderFactory = SPOSetBuilderFactoryT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp index 9b116c76a4..e8342a693b 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp @@ -25,16 +25,14 @@ #include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" #include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h" #include "QMCWaveFunctions/SPOSetScannerT.h" +#include "PlaneWave/PWOrbitalSetBuilder.h" #if OHMMS_DIM == 3 #include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" -#if defined(QMC_COMPLEX) -#include "QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h" -#endif - #if defined(HAVE_EINSPLINE) -#include "QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h" +#include "QMCWaveFunctions/EinsplineSpinorSetBuilderT.h" #endif +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" #endif #include "Message/MPIObjectBase.h" #include "OhmmsData/AttributeSet.h" @@ -101,8 +99,8 @@ std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder { ReportEngine PRE(ClassName, "createSPOSetBuilder"); std::string sourceOpt("ion0"); - std::string type(""); - std::string name(""); + std::string type; + std::string name; OhmmsAttributeSet aAttrib; aAttrib.add(sourceOpt, "source"); aAttrib.add(type, "type"); @@ -135,16 +133,21 @@ std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder app_log() << "Harmonic Oscillator SPO set" << std::endl; bb = std::make_unique>(targetPtcl, myComm); } + else if (type == "PWBasis" || type == "PW" || type == "pw") + { + app_log() << "Planewave basis SPO set" << std::endl; + bb = std::make_unique(targetPtcl, myComm, rootNode); + } #if OHMMS_DIM == 3 else if (type.find("spline") < type.size()) { if (targetPtcl.isSpinor()) { #ifdef QMC_COMPLEX - app_log() << "Einspline Spinor Set\n"; - // FIXME - // bb = std::make_unique(targetPtcl, - // ptclPool, myComm, rootNode); + app_log() << "Einspline Spinor Set\n"; + // FIXME + bb = std::make_unique>(targetPtcl, + ptclPool, myComm, rootNode); #else PRE.error("Use of einspline spinors requires QMC_COMPLEX=1. " "Rebuild with this option"); @@ -153,11 +156,11 @@ std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder else { #if defined(HAVE_EINSPLINE) - PRE << "EinsplineSetBuilder: using libeinspline for B-spline " - "orbitals.\n"; - // FIXME - // bb = std::make_unique(targetPtcl, ptclPool, - // myComm, rootNode); + PRE << "EinsplineSetBuilder: using libeinspline for B-spline " + "orbitals.\n"; + // FIXME + bb = std::make_unique>(targetPtcl, ptclPool, + myComm, rootNode); #else PRE.error("Einspline is missing for B-spline orbitals", true); #endif @@ -265,8 +268,17 @@ void SPOSetBuilderFactoryT::addSPOSet(std::unique_ptr> spo) template std::string SPOSetBuilderFactoryT::basisset_tag = "basisset"; +#ifdef QMC_COMPLEX +#ifndef MIXED_PRECISION template class SPOSetBuilderFactoryT>; +#else template class SPOSetBuilderFactoryT>; +#endif +#else +#ifndef MIXED_PRECISION template class SPOSetBuilderFactoryT; +#else template class SPOSetBuilderFactoryT; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.cpp b/src/QMCWaveFunctions/SPOSetBuilderT.cpp index 389c91684e..b83c265af9 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderT.cpp +++ b/src/QMCWaveFunctions/SPOSetBuilderT.cpp @@ -78,7 +78,7 @@ std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr c } if (!sposet) - myComm->barrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); + myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed"); if (optimize == "rotation" || optimize == "yes") { @@ -161,7 +161,7 @@ std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr } if (!sposet) - myComm->barrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); + myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed"); if (optimize == "rotation" || optimize == "yes") { @@ -244,7 +244,7 @@ std::unique_ptr>> SPOSetBuilderT } if (!sposet) - myComm->barrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); + myComm->barrier_and_abort("SPOSetBuilderT>::createSPOSet sposet creation failed"); if (optimize == "rotation" || optimize == "yes") { @@ -305,7 +305,7 @@ std::unique_ptr>> SPOSetBuilderTbarrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); + myComm->barrier_and_abort("SPOSetBuilderT>::createSPOSet sposet creation failed"); if (optimize == "rotation" || optimize == "yes") { @@ -327,7 +327,7 @@ template std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) { myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented"); - return 0; + return nullptr; } diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.h b/src/QMCWaveFunctions/SPOSetBuilderT.h index 8bb3071df6..1183a56ad3 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderT.h +++ b/src/QMCWaveFunctions/SPOSetBuilderT.h @@ -72,10 +72,10 @@ class SPOSetBuilderT : public QMCTraits, public MPIObjectBase inline void clear_states(int index = 0) { states[index]->clear(); } /// create an sposet from xml and save the resulting SPOSet - std::unique_ptr> createSPOSet(xmlNodePtr cur); + [[nodiscard]] std::unique_ptr> createSPOSet(xmlNodePtr cur); /// create orbital rotation transformation from xml and save the resulting SPOSet - std::unique_ptr> createRotatedSPOSet(xmlNodePtr cur); + [[nodiscard]] std::unique_ptr> createRotatedSPOSet(xmlNodePtr cur); const std::string& getTypeName() const { return type_name_; } diff --git a/src/QMCWaveFunctions/SPOSetInfo.h b/src/QMCWaveFunctions/SPOSetInfo.h index 8ee31c909d..04961ebe63 100644 --- a/src/QMCWaveFunctions/SPOSetInfo.h +++ b/src/QMCWaveFunctions/SPOSetInfo.h @@ -129,7 +129,6 @@ class SPOSetInfo /// empty collection and render mutable void clear(); - friend class SPOSetBuilder; template friend class SPOSetBuilderT; }; diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h index e4841b90bb..814601bbac 100644 --- a/src/QMCWaveFunctions/SPOSetScannerT.h +++ b/src/QMCWaveFunctions/SPOSetScannerT.h @@ -13,7 +13,7 @@ #define QMCPLUSPLUS_SPOSET_SCANNERT_H #include "OhmmsData/AttributeSet.h" -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "QMCWaveFunctions/SPOSetT.h" diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp index cd8ac79eb5..587156abf9 100644 --- a/src/QMCWaveFunctions/SPOSetT.cpp +++ b/src/QMCWaveFunctions/SPOSetT.cpp @@ -43,7 +43,7 @@ void SPOSetT::extractOptimizableObjectRefs(UniqueOptObjRefsT&) } template -void SPOSetT::checkOutVariables(const OptVariablesType& active) +void SPOSetT::checkOutVariables(const OptVariablesTypeT& active) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + @@ -284,7 +284,7 @@ void SPOSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) template void SPOSetT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, + const OptVariablesTypeT& optvars, Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, @@ -298,7 +298,7 @@ void SPOSetT::evaluateDerivatives(ParticleSetT& P, template void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, + const OptVariablesTypeT& optvars, Vector& dlogpsi, int FirstIndex, int LastIndex) @@ -311,7 +311,7 @@ void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, template void SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, + const OptVariablesTypeT& optvars, ValueVector& psi, const ValueVector& psiinv, std::vector& ratios, @@ -331,7 +331,7 @@ void SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, template void SPOSetT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, + const OptVariablesTypeT& optvars, Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, @@ -365,9 +365,9 @@ void SPOSetT::evaluateDerivatives(ParticleSetT& P, template void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, + const OptVariablesTypeT& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, + const FullValueType& psiCurrent, const std::vector& Coeff, const std::vector& C2node_up, const std::vector& C2node_dn, diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h index 98c3743f7a..4900c0499f 100644 --- a/src/QMCWaveFunctions/SPOSetT.h +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -29,7 +29,7 @@ #include "OMPTarget/OffloadAlignedAllocators.hpp" #include "OhmmsPETE/OhmmsArray.h" #include "OptimizableObjectT.h" -#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSet.h" #include "Particle/VirtualParticleSetT.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" @@ -41,14 +41,10 @@ template class SPOSetT; namespace testing { -OptVariablesType& -getMyVars(SPOSetT& spo); -OptVariablesType& -getMyVars(SPOSetT& spo); -OptVariablesType>& -getMyVars(SPOSetT>& spo); -OptVariablesType>& -getMyVars(SPOSetT>& spo); +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); } // namespace testing /** base class for Single-particle orbital sets @@ -81,6 +77,8 @@ class SPOSetT : public QMCTraits using ComplexType = typename OrbitalSetTraits::ComplexType; using ValueType = typename OrbitalSetTraits::ValueType; using FullRealType = typename OrbitalSetTraits::RealType; + using FullValueType = typename OrbitalSetTraits::FullValueType; + ; template using OffloadMatrix = Matrix>; @@ -140,8 +138,7 @@ class SPOSetT : public QMCTraits /** check out variational optimizable variables * @param active a super set of optimizable variables */ - virtual void - checkOutVariables(const OptVariablesType& active); + virtual void checkOutVariables(const OptVariablesTypeT& active); /// Query if this SPOSet uses OpenMP offload virtual bool @@ -182,49 +179,70 @@ class SPOSetT : public QMCTraits /// Parameter derivatives of the wavefunction and the Laplacian of the /// wavefunction - virtual void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, - const int& LastIndex); + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex); /// Parameter derivatives of the wavefunction - virtual void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex); + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex); /** Evaluate the derivative of the optimized orbitals with respect to the * parameters this is used only for MSD, to be refined for better serving * both single and multi SD */ - virtual void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, - const std::vector& Coeff, const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const GradMatrix& grads_up, - const GradMatrix& grads_dn, const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl); + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); /** Evaluate the derivative of the optimized orbitals with respect to the * parameters this is used only for MSD, to be refined for better serving * both single and multi SD */ - virtual void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const std::vector& detData_up, - const std::vector>& lookup_tbl); + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl); /** set the OrbitalSetSize * @param norbs number of single-particle orbitals @@ -259,11 +277,14 @@ class SPOSetT : public QMCTraits /// Determinant ratios and parameter derivatives of the wavefunction for /// virtual moves - virtual void - evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex); + virtual void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex); /** evaluate determinant ratios for virtual moves, e.g., sphere move for * nonlocalPP, of multiple walkers @@ -630,16 +651,12 @@ class SPOSetT : public QMCTraits /// number of Single-particle orbitals IndexType OrbitalSetSize; /// Optimizable variables - OptVariablesType myVars; - - friend OptVariablesType& - testing::getMyVars(SPOSetT& spo); - friend OptVariablesType& - testing::getMyVars(SPOSetT& spo); - friend OptVariablesType>& - testing::getMyVars(SPOSetT>& spo); - friend OptVariablesType>& - testing::getMyVars(SPOSetT>& spo); + OptVariablesTypeT myVars; + + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); }; template diff --git a/src/QMCWaveFunctions/SpinorSet.cpp b/src/QMCWaveFunctions/SpinorSet.cpp deleted file mode 100644 index 4f0531659b..0000000000 --- a/src/QMCWaveFunctions/SpinorSet.cpp +++ /dev/null @@ -1,558 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2022 QMCPACK developers -// -// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories -// Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories -// -// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "SpinorSet.h" -#include "Utilities/ResourceCollection.h" -#include "Platforms/OMPTarget/OMPTargetMath.hpp" - -namespace qmcplusplus -{ -struct SpinorSet::SpinorSetMultiWalkerResource : public Resource -{ - SpinorSetMultiWalkerResource() : Resource("SpinorSet") {} - SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {} - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; - std::vector up_ratios, dn_ratios; - std::vector up_grads, dn_grads; - std::vector spins; -}; - -SpinorSet::SpinorSet(const std::string& my_name) : SPOSet(my_name), spo_up(nullptr), spo_dn(nullptr) {} -SpinorSet::~SpinorSet() = default; - -void SpinorSet::set_spos(std::unique_ptr&& up, std::unique_ptr&& dn) -{ - //Sanity check for input SPO's. They need to be the same size or - IndexType spo_size_up = up->getOrbitalSetSize(); - IndexType spo_size_down = dn->getOrbitalSetSize(); - - if (spo_size_up != spo_size_down) - throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO components have different sizes."); - - setOrbitalSetSize(spo_size_up); - - spo_up = std::move(up); - spo_dn = std::move(dn); - - psi_work_up.resize(OrbitalSetSize); - psi_work_down.resize(OrbitalSetSize); - - dpsi_work_up.resize(OrbitalSetSize); - dpsi_work_down.resize(OrbitalSetSize); - - d2psi_work_up.resize(OrbitalSetSize); - d2psi_work_down.resize(OrbitalSetSize); -} - -void SpinorSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; }; - - -void SpinorSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - psi_work_up = 0.0; - psi_work_down = 0.0; - - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); - - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - //This is only supported in the complex build, so ValueType is some complex number depending on the precision. - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - - psi = eis * psi_work_up + emis * psi_work_down; -} - -void SpinorSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; - - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; -} - -void SpinorSet::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) -{ - psi_work_up = 0.0; - psi_work_down = 0.0; - dpsi_work_up = 0.0; - dpsi_work_down = 0.0; - d2psi_work_up = 0.0; - d2psi_work_down = 0.0; - - spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); - spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); - - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eis * dpsi_work_up + emis * dpsi_work_down; - d2psi = eis * d2psi_work_up + emis * d2psi_work_down; - dspin = eye * (eis * psi_work_up - emis * psi_work_down); -} - -void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - auto& spo_leader = spo_list.getCastedLeader(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - RefVector up_psi_v_list, dn_psi_v_list; - RefVector up_dpsi_v_list, dn_dpsi_v_list; - RefVector up_d2psi_v_list, dn_d2psi_v_list; - for (int iw = 0; iw < nw; iw++) - { - auto& spo = spo_list.getCastedElement(iw); - up_psi_v_list.push_back(spo.psi_work_up); - dn_psi_v_list.push_back(spo.psi_work_down); - up_dpsi_v_list.push_back(spo.dpsi_work_up); - dn_dpsi_v_list.push_back(spo.dpsi_work_down); - up_d2psi_v_list.push_back(spo.d2psi_work_up); - dn_d2psi_v_list.push_back(spo.d2psi_work_down); - } - - up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list); - dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list); - - for (int iw = 0; iw < nw; iw++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); - - psi_v_list[iw].get() = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); - dpsi_v_list[iw].get() = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); - d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]); - } - //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device - mw_dspin.updateTo(); -} - -void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const -{ - auto& spo_leader = spo_list.getCastedLeader(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - - auto& mw_res = spo_leader.mw_res_handle_.getResource(); - auto& up_phi_vgl_v = mw_res.up_phi_vgl_v; - auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v; - auto& up_ratios = mw_res.up_ratios; - auto& dn_ratios = mw_res.dn_ratios; - auto& up_grads = mw_res.up_grads; - auto& dn_grads = mw_res.dn_grads; - auto& spins = mw_res.spins; - - up_phi_vgl_v.resize(DIM_VGL, nw, norb_requested); - dn_phi_vgl_v.resize(DIM_VGL, nw, norb_requested); - up_ratios.resize(nw); - dn_ratios.resize(nw); - up_grads.resize(nw); - dn_grads.resize(nw); - spins.resize(nw); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios, - up_grads); - dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios, - dn_grads); - for (int iw = 0; iw < nw; iw++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - spins[iw] = s; - RealType coss = std::cos(s); - RealType sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); - - ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; - grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw]; - spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw]; - } - - auto* spins_ptr = spins.data(); - //This data lives on the device - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); - auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); - PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])") - for (int iw = 0; iw < nw; iw++) - { - RealType c, s; - omptarget::sincos(spins_ptr[iw], &s, &c); - ValueType eis(c, s), emis(c, -s); - PRAGMA_OFFLOAD("omp parallel for collapse(2)") - for (int idim = 0; idim < DIM_VGL; idim++) - for (int iorb = 0; iorb < norb_requested; iorb++) - { - auto offset = idim * nw * norb_requested + iw * norb_requested + iorb; - phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset]; - } - } -} - -void SpinorSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - IndexType nelec = P.getTotalNum(); - - logpsi_work_up.resize(nelec, OrbitalSetSize); - logpsi_work_down.resize(nelec, OrbitalSetSize); - - dlogpsi_work_up.resize(nelec, OrbitalSetSize); - dlogpsi_work_down.resize(nelec, OrbitalSetSize); - - d2logpsi_work_up.resize(nelec, OrbitalSetSize); - d2logpsi_work_down.resize(nelec, OrbitalSetSize); - - spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - - - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - - for (int no = 0; no < OrbitalSetSize; no++) - { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); - } - } -} - -void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const -{ - auto& spo_leader = spo_list.getCastedLeader(); - auto& P_leader = P_list.getLeader(); - assert(this == &spo_leader); - - IndexType nw = spo_list.size(); - IndexType nelec = P_leader.getTotalNum(); - - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - - std::vector mw_up_logdet, mw_dn_logdet; - std::vector mw_up_dlogdet, mw_dn_dlogdet; - std::vector mw_up_d2logdet, mw_dn_d2logdet; - mw_up_logdet.reserve(nw); - mw_dn_logdet.reserve(nw); - mw_up_dlogdet.reserve(nw); - mw_dn_dlogdet.reserve(nw); - mw_up_d2logdet.reserve(nw); - mw_dn_d2logdet.reserve(nw); - - RefVector up_logdet_list, dn_logdet_list; - RefVector up_dlogdet_list, dn_dlogdet_list; - RefVector up_d2logdet_list, dn_d2logdet_list; - up_logdet_list.reserve(nw); - dn_logdet_list.reserve(nw); - up_dlogdet_list.reserve(nw); - dn_dlogdet_list.reserve(nw); - up_d2logdet_list.reserve(nw); - dn_d2logdet_list.reserve(nw); - - ValueMatrix tmp_val_mat(nelec, OrbitalSetSize); - GradMatrix tmp_grad_mat(nelec, OrbitalSetSize); - for (int iw = 0; iw < nw; iw++) - { - mw_up_logdet.emplace_back(tmp_val_mat); - up_logdet_list.emplace_back(mw_up_logdet.back()); - mw_dn_logdet.emplace_back(tmp_val_mat); - dn_logdet_list.emplace_back(mw_dn_logdet.back()); - - mw_up_dlogdet.emplace_back(tmp_grad_mat); - up_dlogdet_list.emplace_back(mw_up_dlogdet.back()); - mw_dn_dlogdet.emplace_back(tmp_grad_mat); - dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back()); - - mw_up_d2logdet.emplace_back(tmp_val_mat); - up_d2logdet_list.emplace_back(mw_up_d2logdet.back()); - mw_dn_d2logdet.emplace_back(tmp_val_mat); - dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back()); - } - - up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list, - up_d2logdet_list); - dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list, - dn_d2logdet_list); - - for (int iw = 0; iw < nw; iw++) - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - - for (int no = 0; no < OrbitalSetSize; no++) - { - logdet_list[iw].get()(iat, no) = - eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no); - dlogdet_list[iw].get()(iat, no) = - eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no); - d2logdet_list[iw].get()(iat, no) = - eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no); - } - } -} - -void SpinorSet::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) -{ - IndexType nelec = P.getTotalNum(); - - logpsi_work_up.resize(nelec, OrbitalSetSize); - logpsi_work_down.resize(nelec, OrbitalSetSize); - - dlogpsi_work_up.resize(nelec, OrbitalSetSize); - dlogpsi_work_down.resize(nelec, OrbitalSetSize); - - d2logpsi_work_up.resize(nelec, OrbitalSetSize); - d2logpsi_work_down.resize(nelec, OrbitalSetSize); - - spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); - spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - - - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); - - for (int no = 0; no < OrbitalSetSize; no++) - { - logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); - dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); - d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no); - dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no)); - } - } -} - - -void SpinorSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) -{ - psi_work_up = 0.0; - psi_work_down = 0.0; - - spo_up->evaluateValue(P, iat, psi_work_up); - spo_dn->evaluateValue(P, iat, psi_work_down); - - ParticleSet::Scalar_t s = P.activeSpin(iat); - - RealType coss(0.0), sins(0.0); - - coss = std::cos(s); - sins = std::sin(s); - - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); - - psi = eis * psi_work_up + emis * psi_work_down; - dpsi = eye * (eis * psi_work_up - emis * psi_work_down); -} - -void SpinorSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - IndexType nelec = P.getTotalNum(); - - GradMatrix gradphi_up(nelec, OrbitalSetSize); - GradMatrix gradphi_dn(nelec, OrbitalSetSize); - spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); - spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); - - for (int iat = 0; iat < nelec; iat++) - { - ParticleSet::Scalar_t s = P.activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - for (int imo = 0; imo < OrbitalSetSize; imo++) - gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; - } -} - -std::unique_ptr SpinorSet::makeClone() const -{ - auto myclone = std::make_unique(my_name_); - std::unique_ptr cloneup(spo_up->makeClone()); - std::unique_ptr clonedn(spo_dn->makeClone()); - myclone->set_spos(std::move(cloneup), std::move(clonedn)); - return myclone; -} - -void SpinorSet::createResource(ResourceCollection& collection) const -{ - spo_up->createResource(collection); - spo_dn->createResource(collection); - auto index = collection.addResource(std::make_unique()); -} - -void SpinorSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.getCastedLeader(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.acquireResource(collection, up_spo_list); - dn_spo_leader.acquireResource(collection, dn_spo_list); - spo_leader.mw_res_handle_ = collection.lendResource(); -} - -void SpinorSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.getCastedLeader(); - auto& up_spo_leader = up_spo_list.getLeader(); - auto& dn_spo_leader = dn_spo_list.getLeader(); - up_spo_leader.releaseResource(collection, up_spo_list); - dn_spo_leader.releaseResource(collection, dn_spo_list); - collection.takebackResource(spo_leader.mw_res_handle_); -} - -std::pair, RefVectorWithLeader> SpinorSet::extractSpinComponentRefList( - const RefVectorWithLeader& spo_list) const -{ - auto& spo_leader = spo_list.getCastedLeader(); - IndexType nw = spo_list.size(); - SPOSet& up_spo_leader = *(spo_leader.spo_up); - SPOSet& dn_spo_leader = *(spo_leader.spo_dn); - RefVectorWithLeader up_spo_list(up_spo_leader); - RefVectorWithLeader dn_spo_list(dn_spo_leader); - up_spo_list.reserve(nw); - dn_spo_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) - { - SpinorSet& spinor = spo_list.getCastedElement(iw); - up_spo_list.emplace_back(*(spinor.spo_up)); - dn_spo_list.emplace_back(*(spinor.spo_dn)); - } - return std::make_pair(up_spo_list, dn_spo_list); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SpinorSet.h b/src/QMCWaveFunctions/SpinorSet.h index 63ebf5c688..91a22a4320 100644 --- a/src/QMCWaveFunctions/SpinorSet.h +++ b/src/QMCWaveFunctions/SpinorSet.h @@ -13,204 +13,12 @@ #ifndef QMCPLUSPLUS_SPINORSET_H #define QMCPLUSPLUS_SPINORSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include +#include "Configuration.h" +#include "QMCWaveFunctions/SpinorSetT.h" namespace qmcplusplus { -/** Class for Melton & Mitas style Spinors. - * - */ -class SpinorSet : public SPOSet -{ -public: - /** constructor */ - SpinorSet(const std::string& my_name); - ~SpinorSet() override; - - std::string getClassName() const override { return "SpinorSet"; } - bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); } - bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); } - bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); } - - //This class is initialized by separately building the up and down channels of the spinor set and - //then registering them. - void set_spos(std::unique_ptr&& up, std::unique_ptr&& dn); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - */ - void setOrbitalSetSize(int norbs) override; - - /** evaluate the values of this spinor set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - */ - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradient of the SPO - */ - void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) override; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals - */ - void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * - */ - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) override; - /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat. - * @param P current particle set. - * @param iat electron index. - * @param spinor values. - * @param spin gradient values. d/ds phi(r,s). - * - */ - void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override; - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - - std::unique_ptr makeClone() const override; - - void createResource(ResourceCollection& collection) const override; - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - /// check if the multi walker resource is owned. For testing only. - bool isResourceOwned() const { return bool(mw_res_handle_); } - -private: - struct SpinorSetMultiWalkerResource; - ResourceHandle mw_res_handle_; - - std::pair, RefVectorWithLeader> extractSpinComponentRefList( - const RefVectorWithLeader& spo_list) const; - - //Sposet for the up and down channels of our spinors. - std::unique_ptr spo_up; - std::unique_ptr spo_dn; - - //temporary arrays for holding the values of the up and down channels respectively. - ValueVector psi_work_up; - ValueVector psi_work_down; - - //temporary arrays for holding the gradients of the up and down channels respectively. - GradVector dpsi_work_up; - GradVector dpsi_work_down; - - //temporary arrays for holding the laplacians of the up and down channels respectively. - ValueVector d2psi_work_up; - ValueVector d2psi_work_down; - - //Same as above, but these are the full matrices containing all spinor/particle combinations. - ValueMatrix logpsi_work_up; - ValueMatrix logpsi_work_down; - - GradMatrix dlogpsi_work_up; - GradMatrix dlogpsi_work_down; - - ValueMatrix d2logpsi_work_up; - ValueMatrix d2logpsi_work_down; -}; +using SpinorSet = SpinorSetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/TrialWaveFunction.h b/src/QMCWaveFunctions/TrialWaveFunction.h index c27196f494..24a7e22a7d 100644 --- a/src/QMCWaveFunctions/TrialWaveFunction.h +++ b/src/QMCWaveFunctions/TrialWaveFunction.h @@ -31,6 +31,7 @@ #include "QMCWaveFunctions/TWFFastDerivWrapper.h" #include "TWFGrads.hpp" #include "Utilities/RuntimeOptions.h" +#include "SPOSetT.h" /**@defgroup MBWfs Many-body wave function group * @brief Classes to handle many-body trial wave functions diff --git a/src/QMCWaveFunctions/VariableSet.cpp b/src/QMCWaveFunctions/VariableSet.cpp deleted file mode 100644 index e9ee94daf1..0000000000 --- a/src/QMCWaveFunctions/VariableSet.cpp +++ /dev/null @@ -1,314 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "VariableSet.h" -#include "io/hdf/hdf_archive.h" -#include "Host/sysutil.h" -#include -#include -#include -#include -#include - -using std::setw; - -namespace optimize -{ -void VariableSet::clear() -{ - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); -} - -void VariableSet::insertFrom(const VariableSet& input) -{ - for (int i = 0; i < input.size(); ++i) - { - iterator loc = find(input.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input.Index[i]); - NameAndValue.push_back(input.NameAndValue[i]); - ParameterType.push_back(input.ParameterType[i]); - Recompute.push_back(input.Recompute[i]); - } - else - (*loc).second = input.NameAndValue[i].second; - } - num_active_vars = input.num_active_vars; -} - -void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& input_2) -{ - value_type sum_val; - std::string vname; - - // Check that objects to be summed together have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error("Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) - { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be adding them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error("Inconsistent parameters exist in the two provided " - "variable sets."); - - sum_val = input_1.NameAndValue[i].second + input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't summed between the - // objects, but the parameter values themselves need to use the summed - // values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, sum_val)); - } - else - (*loc).second = sum_val; - } - num_active_vars = input_1.num_active_vars; -} - -void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& input_2) -{ - value_type diff_val; - std::string vname; - - // Check that objects to be subtracted have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error("Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) - { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be subtracting them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error("Inconsistent parameters exist in the two provided " - "variable sets."); - - diff_val = input_1.NameAndValue[i].second - input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't subtracted between the - // objects, but the parameter values themselves need to use the - // subtracted values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, diff_val)); - } - else - (*loc).second = diff_val; - } - num_active_vars = input_1.num_active_vars; -} - -void VariableSet::removeInactive() -{ - std::vector valid(Index); - std::vector acopy(NameAndValue); - std::vector bcopy(Recompute), ccopy(ParameterType); - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); - for (int i = 0; i < valid.size(); ++i) - { - if (valid[i] > -1) - { - Index.push_back(num_active_vars++); - NameAndValue.push_back(acopy[i]); - Recompute.push_back(bcopy[i]); - ParameterType.push_back(ccopy[i]); - } - } -} - -void VariableSet::resetIndex() -{ - num_active_vars = 0; - for (int i = 0; i < Index.size(); ++i) - { - Index[i] = (Index[i] < 0) ? -1 : num_active_vars++; - } -} - -void VariableSet::getIndex(const VariableSet& selected) -{ - num_active_vars = 0; - for (int i = 0; i < NameAndValue.size(); ++i) - { - Index[i] = selected.getIndex(NameAndValue[i].first); - if (Index[i] >= 0) - num_active_vars++; - } -} - -int VariableSet::getIndex(const std::string& vname) const -{ - int loc = 0; - while (loc != NameAndValue.size()) - { - if (NameAndValue[loc].first == vname) - return Index[loc]; - ++loc; - } - return -1; -} - -void VariableSet::setIndexDefault() -{ - for (int i = 0; i < Index.size(); ++i) - Index[i] = i; -} - -void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) const -{ - std::string pad_str = std::string(leftPadSpaces, ' '); - int max_name_len = 0; - if (NameAndValue.size() > 0) - max_name_len = - std::max_element(NameAndValue.begin(), NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) { - return e1.first.length() < e2.first.length(); - })->first.length(); - - int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading value, period, and exponent. - int max_type_len = 1; - int max_recompute_len = 1; - int max_use_len = 3; - int max_index_len = 1; - if (printHeader) - { - max_name_len = std::max(max_name_len, 4); // size of "Name" header - max_type_len = 4; - max_recompute_len = 9; - max_index_len = 5; - os << pad_str << setw(max_name_len) << "Name" - << " " << setw(max_value_len) << "Value" - << " " << setw(max_type_len) << "Type" - << " " << setw(max_recompute_len) << "Recompute" - << " " << setw(max_use_len) << "Use" - << " " << setw(max_index_len) << "Index" << std::endl; - os << pad_str << std::setfill('-') << setw(max_name_len) << "" - << " " << setw(max_value_len) << "" - << " " << setw(max_type_len) << "" - << " " << setw(max_recompute_len) << "" - << " " << setw(max_use_len) << "" - << " " << setw(max_index_len) << "" << std::endl; - os << std::setfill(' '); - } - - for (int i = 0; i < NameAndValue.size(); ++i) - { - os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " << std::setprecision(6) << std::scientific - << setw(max_value_len) << NameAndValue[i].second << " " << setw(max_type_len) << ParameterType[i].second << " " - << setw(max_recompute_len) << Recompute[i].second << " "; - - os << std::defaultfloat; - - if (Index[i] < 0) - os << setw(max_use_len) << "OFF" << std::endl; - else - os << setw(max_use_len) << "ON" - << " " << setw(max_index_len) << Index[i] << std::endl; - } -} - -void VariableSet::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const -{ - hout.create(filename); - - // File Versioning - // 1.0.0 Initial file version - // 1.1.0 Files could have object-specific data from OptimizableObject::read/writeVariationalParameters - std::vector vp_file_version{1, 1, 0}; - hout.write(vp_file_version, "version"); - - std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); - hout.write(timestamp, "timestamp"); - - hout.push("name_value_lists"); - - std::vector param_values; - std::vector param_names; - for (auto& pair_it : NameAndValue) - { - param_names.push_back(pair_it.first); - param_values.push_back(pair_it.second); - } - - hout.write(param_names, "parameter_names"); - hout.write(param_values, "parameter_values"); - hout.pop(); -} - -void VariableSet::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin) -{ - if (!hin.open(filename, H5F_ACC_RDONLY)) - { - std::ostringstream err_msg; - err_msg << "Unable to open VP file: " << filename; - throw std::runtime_error(err_msg.str()); - } - - try - { - hin.push("name_value_lists", false); - } - catch (std::runtime_error&) - { - std::ostringstream err_msg; - err_msg << "The group name_value_lists in not present in file: " << filename; - throw std::runtime_error(err_msg.str()); - } - - std::vector param_values; - hin.read(param_values, "parameter_values"); - - std::vector param_names; - hin.read(param_names, "parameter_names"); - - for (int i = 0; i < param_names.size(); i++) - { - std::string& vp_name = param_names[i]; - // Find and set values by name. - // Values that are not present do not get added. - if (find(vp_name) != end()) - (*this)[vp_name] = param_values[i]; - } - - hin.pop(); -} - - -} // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSet.h b/src/QMCWaveFunctions/VariableSet.h index 66ba4da3bf..c2c88a271c 100644 --- a/src/QMCWaveFunctions/VariableSet.h +++ b/src/QMCWaveFunctions/VariableSet.h @@ -12,323 +12,15 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H #define QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H -#include "config.h" -#include -#include -#include -#include -#include "Configuration.h" -namespace qmcplusplus -{ -class hdf_archive; -} +#include "Configuration.h" +#include "QMCWaveFunctions/VariableSetT.h" namespace optimize { -/** An enum useful for determining the type of parameter is being optimized. -* knowing this in the opt routine can reduce the computational load. -*/ -enum -{ - OTHER_P = 0, - LOGLINEAR_P, //B-spline Jastrows - LOGLINEAR_K, //K space Jastrows - LINEAR_P, //Multi-determinant coefficients - SPO_P, //SPO set Parameters - BACKFLOW_P //Backflow parameters -}; - -/** class to handle a set of variables that can be modified during optimizations - * - * A serialized container of named variables. - */ -struct VariableSet -{ - using value_type = qmcplusplus::QMCTraits::ValueType; - using real_type = qmcplusplus::QMCTraits::RealType; - - using pair_type = std::pair; - using index_pair_type = std::pair; - using iterator = std::vector::iterator; - using const_iterator = std::vector::const_iterator; - using size_type = std::vector::size_type; - - ///number of active variables - int num_active_vars; - /** store locator of the named variable - * - * if(Index[i] == -1), the named variable is not active - */ - std::vector Index; - std::vector NameAndValue; - std::vector ParameterType; - std::vector Recompute; - - ///default constructor - inline VariableSet() : num_active_vars(0) {} - ///viturval destructor for safety - virtual ~VariableSet() = default; - /** if any of Index value is not zero, return true - */ - inline bool is_optimizable() const { return num_active_vars > 0; } - ///return the number of active variables - inline int size_of_active() const { return num_active_vars; } - ///return the first const_iterator - inline const_iterator begin() const { return NameAndValue.begin(); } - ///return the last const_iterator - inline const_iterator end() const { return NameAndValue.end(); } - ///return the first iterator - inline iterator begin() { return NameAndValue.begin(); } - ///return the last iterator - inline iterator end() { return NameAndValue.end(); } - ///return the size - inline size_type size() const { return NameAndValue.size(); } - ///return the locator of the i-th Index - inline int where(int i) const { return Index[i]; } - /** return the iterator of a named parameter - * @param vname name of a parameter - * @return the locator of vname - * - * If vname is not found among the Names, return NameAndValue.end() - * so that ::end() member function can be used to validate the iterator. - */ - inline iterator find(const std::string& vname) - { - return std::find_if(NameAndValue.begin(), NameAndValue.end(), - [&vname](const auto& value) { return value.first == vname; }); - } - - /** return the Index vaule for the named parameter - * @param vname name of the variable - * - * If vname is not found in this variables, return -1; - */ - int getIndex(const std::string& vname) const; - - /* return the NameAndValue index for the named parameter - * @ param vname name of the variable - * - * Differs from getIndex by not relying on the indices cached in Index - * myVars[i] will always return the value of the parameter if it is stored - * regardless of whether or not the Index array has been correctly reset - * - * if vname is not found, return -1 - * - */ - inline int getLoc(const std::string& vname) const - { - int loc = 0; - while (loc != NameAndValue.size()) - { - if (NameAndValue[loc].first == vname) - return loc; - ++loc; - } - return -1; - } - - inline void insert(const std::string& vname, value_type v, bool enable = true, int type = OTHER_P) - { - iterator loc = find(vname); - int ind_loc = loc - NameAndValue.begin(); - if (loc == NameAndValue.end()) // && enable==true) - { - Index.push_back(ind_loc); - NameAndValue.push_back(pair_type(vname, v)); - ParameterType.push_back(index_pair_type(vname, type)); - Recompute.push_back(index_pair_type(vname, 1)); - } - //disable it if enable == false - if (!enable) - Index[ind_loc] = -1; - } - - inline void setParameterType(int type) - { - std::vector::iterator PTit(ParameterType.begin()), PTend(ParameterType.end()); - while (PTit != PTend) - { - (*PTit).second = type; - PTit++; - } - } - - inline void getParameterTypeList(std::vector& types) const - { - auto ptit(ParameterType.begin()), ptend(ParameterType.end()); - types.resize(ptend - ptit); - auto tit(types.begin()); - while (ptit != ptend) - (*tit++) = (*ptit++).second; - } - - - /** equivalent to std::map[string] operator - */ - inline value_type& operator[](const std::string& vname) - { - iterator loc = find(vname); - if (loc == NameAndValue.end()) - { - Index.push_back(-1); - NameAndValue.push_back(pair_type(vname, 0)); - ParameterType.push_back(index_pair_type(vname, 0)); - Recompute.push_back(index_pair_type(vname, 1)); - return NameAndValue.back().second; - } - return (*loc).second; - } - - - /** return the name of i-th variable - * @param i index - */ - const std::string& name(int i) const { return NameAndValue[i].first; } - - /** return the i-th value - * @param i index - */ - inline value_type operator[](int i) const { return NameAndValue[i].second; } - - /** assign the i-th value - * @param i index - */ - inline value_type& operator[](int i) { return NameAndValue[i].second; } - - /** get the i-th parameter's type - * @param i index - */ - inline int getType(int i) const { return ParameterType[i].second; } - - inline bool recompute(int i) const { return (Recompute[i].second == 1); } - - inline int& recompute(int i) { return Recompute[i].second; } - - inline void setComputed() - { - for (int i = 0; i < Recompute.size(); i++) - { - if (ParameterType[i].second == LOGLINEAR_P) - Recompute[i].second = 0; - else if (ParameterType[i].second == LOGLINEAR_K) - Recompute[i].second = 0; - else - Recompute[i].second = 1; - } - } - - inline void setRecompute() - { - for (int i = 0; i < Recompute.size(); i++) - Recompute[i].second = 1; - } - - /** clear the variable set - * - * Remove all the data. - */ - void clear(); - - /** insert a VariableSet to the list - * @param input variables - */ - void insertFrom(const VariableSet& input); - - /** sum together the values of the optimizable parameter values in - * two VariableSet objects, and set this object's values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromSum(const VariableSet& input_1, const VariableSet& input_2); - - /** take the difference (input_1-input_2) of values of the optimizable - * parameter values in two VariableSet objects, and set this object's - * values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromDiff(const VariableSet& input_1, const VariableSet& input_2); - - /** activate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, Index is updated - * - * The status of a variable that is not included in the [first,last) - * remains the same. - */ - template - void activate(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - iterator loc = find(*first++); - if (loc != NameAndValue.end()) - { - int i = loc - NameAndValue.begin(); - if (Index[i] < 0) - Index[i] = num_active_vars++; - } - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** deactivate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, the variales are removed and Index is updated - */ - template - void disable(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - int loc = find(*first++) - NameAndValue.begin(); - if (loc < NameAndValue.size()) - Index[loc] = -1; - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** reset Index - */ - void resetIndex(); - /** remove inactive variables and trim the internal data - */ - void removeInactive(); - - /** set the index table of this VariableSet - * @param selected input variables - * - * This VariableSet is a subset of selected. - */ - void getIndex(const VariableSet& selected); - - /** set default Indices, namely all the variables are active - */ - void setIndexDefault(); - - void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; - - // Save variational parameters to an HDF file - void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const; - - /// Read variational parameters from an HDF file. - /// This assumes VariableSet is already set up. - void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin); -}; +using VariableSet = VariableSetT; } // namespace optimize #endif diff --git a/src/QMCWaveFunctions/VariableSetT.cpp b/src/QMCWaveFunctions/VariableSetT.cpp index 064ac26a13..2c49401066 100644 --- a/src/QMCWaveFunctions/VariableSetT.cpp +++ b/src/QMCWaveFunctions/VariableSetT.cpp @@ -338,9 +338,9 @@ VariableSetT::readFromHDF( hin.pop(); } -template struct VariableSetT; -template struct VariableSetT; -template struct VariableSetT>; -template struct VariableSetT>; +template class VariableSetT; +template class VariableSetT; +template class VariableSetT>; +template class VariableSetT>; } // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSetT.h b/src/QMCWaveFunctions/VariableSetT.h index 9a0675a184..807ede25b1 100644 --- a/src/QMCWaveFunctions/VariableSetT.h +++ b/src/QMCWaveFunctions/VariableSetT.h @@ -20,7 +20,6 @@ #include #include #include -#include "VariableSet.h" #include "OrbitalSetTraits.h" namespace qmcplusplus @@ -33,23 +32,24 @@ namespace optimize /** An enum useful for determining the type of parameter is being optimized. * knowing this in the opt routine can reduce the computational load. */ -// enum -// { -// OTHER_P = 0, -// LOGLINEAR_P, //B-spline Jastrows -// LOGLINEAR_K, //K space Jastrows -// LINEAR_P, //Multi-determinant coefficients -// SPO_P, //SPO set Parameters -// BACKFLOW_P //Backflow parameters -// }; +enum +{ + OTHER_P = 0, + LOGLINEAR_P, //B-spline Jastrows + LOGLINEAR_K, //K space Jastrows + LINEAR_P, //Multi-determinant coefficients + SPO_P, //SPO set Parameters + BACKFLOW_P //Backflow parameters +}; /** class to handle a set of variables that can be modified during optimizations * * A serialized container of named variables. */ -template -struct VariableSetT +template +class VariableSetT { +public: using value_type = typename qmcplusplus::OrbitalSetTraits::ValueType; using real_type = typename qmcplusplus::OrbitalSetTraits::RealType; diff --git a/src/QMCWaveFunctions/WaveFunctionPool.h b/src/QMCWaveFunctions/WaveFunctionPool.h index 36e562367b..81459ae6dc 100644 --- a/src/QMCWaveFunctions/WaveFunctionPool.h +++ b/src/QMCWaveFunctions/WaveFunctionPool.h @@ -21,13 +21,14 @@ #include "Message/MPIObjectBase.h" #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "Utilities/RuntimeOptions.h" +#include "Particle/ParticleSet.h" +#include "Particle/ParticleSetPool.h" + #include #include namespace qmcplusplus { -class ParticleSetPool; -class ParticleSet; /** @ingroup qmcapp * @brief Manage a collection of TrialWaveFunction objects diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt index ee68f38a1e..01db0f9123 100644 --- a/src/QMCWaveFunctions/tests/CMakeLists.txt +++ b/src/QMCWaveFunctions/tests/CMakeLists.txt @@ -141,17 +141,10 @@ set(DETERMINANT_SRC test_ci_configuration.cpp test_multi_slater_determinant.cpp) -add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp ConstantSPOSetT.cpp) +add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSetT.cpp) target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(sposets_for_testing PUBLIC qmcwfs) -# @TODO: Remove when rotations work for complex stuff -if(NOT QMC_COMPLEX) - if(NOT ENABLE_CUDA) - set(SPOSET_SRC test_RotatedSPOs.cpp ${SPOSET_SRC}) - endif() -endif() - if(ENABLE_CUDA) set(DETERMINANT_SRC ${DETERMINANT_SRC} test_DiracMatrixComputeCUDA.cpp test_cuBLAS_LU.cpp) if(NOT QMC_CUDA2HIP) diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp deleted file mode 100644 index f9ad56e330..0000000000 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp +++ /dev/null @@ -1,100 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2023 Raymond Clay and QMCPACK developers. -// -// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -// -// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "QMCWaveFunctions/tests/ConstantSPOSet.h" - -namespace qmcplusplus -{ -ConstantSPOSet::ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals) - : SPOSet(my_name), numparticles_(nparticles) -{ - OrbitalSetSize = norbitals; - ref_psi_.resize(numparticles_, OrbitalSetSize); - ref_egrad_.resize(numparticles_, OrbitalSetSize); - ref_elapl_.resize(numparticles_, OrbitalSetSize); - - ref_psi_ = 0.0; - ref_egrad_ = 0.0; - ref_elapl_ = 0.0; -}; - -std::unique_ptr ConstantSPOSet::makeClone() const -{ - auto myclone = std::make_unique(my_name_, numparticles_, OrbitalSetSize); - myclone->setRefVals(ref_psi_); - myclone->setRefEGrads(ref_egrad_); - myclone->setRefELapls(ref_elapl_); - return myclone; -}; - -std::string ConstantSPOSet::getClassName() const { return "ConstantSPOSet"; }; - -void ConstantSPOSet::checkOutVariables(const opt_variables_type& active) -{ - APP_ABORT("ConstantSPOSet should not call checkOutVariables"); -}; - -void ConstantSPOSet::setOrbitalSetSize(int norbs) { APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); } - -void ConstantSPOSet::setRefVals(const ValueMatrix& vals) -{ - assert(vals.cols() == OrbitalSetSize); - assert(vals.rows() == numparticles_); - ref_psi_ = vals; -}; -void ConstantSPOSet::setRefEGrads(const GradMatrix& grads) -{ - assert(grads.cols() == OrbitalSetSize); - assert(grads.rows() == numparticles_); - ref_egrad_ = grads; -}; -void ConstantSPOSet::setRefELapls(const ValueMatrix& lapls) -{ - assert(lapls.cols() == OrbitalSetSize); - assert(lapls.rows() == numparticles_); - ref_elapl_ = lapls; -}; - -void ConstantSPOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - const auto* vp = dynamic_cast(&P); - int ptcl = vp ? vp->refPtcl : iat; - assert(psi.size() == OrbitalSetSize); - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - psi[iorb] = ref_psi_(ptcl, iorb); -}; - -void ConstantSPOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - { - psi[iorb] = ref_psi_(iat, iorb); - dpsi[iorb] = ref_egrad_(iat, iorb); - d2psi[iorb] = ref_elapl_(iat, iorb); - } -}; - -void ConstantSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } -} -} //namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.h b/src/QMCWaveFunctions/tests/ConstantSPOSet.h index c42461856d..5e139d57ba 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSet.h @@ -13,75 +13,12 @@ #ifndef QMCPLUSPLUS_CONSTANTSPOSET_H #define QMCPLUSPLUS_CONSTANTSPOSET_H -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/tests/ConstantSPOSetT.h" namespace qmcplusplus { -/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc., - * These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc. - * Exists to provide deterministic and known output to objects requiring SPOSet evaluations. - * - */ -class ConstantSPOSet : public SPOSet -{ -public: - ConstantSPOSet(const std::string& my_name) = delete; - - //Constructor needs number of particles and number of orbitals. This is the minimum - //amount of information needed to sanely construct all data members and perform size - //checks later. - ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals); - - std::unique_ptr makeClone() const override; - - std::string getClassName() const override; - - void checkOutVariables(const opt_variables_type& active) override; - - void setOrbitalSetSize(int norbs) override; - - /** - * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. - * @param Nelec x Nion ValueType matrix of \phi_j(r_i) - * @return void - */ - void setRefVals(const ValueMatrix& vals); - /** - * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_. - * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) - * @return void - */ - void setRefEGrads(const GradMatrix& grads); - /** - * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_. - * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) - * @return void - */ - void setRefELapls(const ValueMatrix& lapls); - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - -private: - const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices. - /// To ensure consistent array sizing and enforcement, - /// we agree at construction how large these matrices will be. - /// norb is stored in SPOSet::OrbitalSetSize. - - //Value, electron gradient, and electron laplacian at "reference configuration". - //i.e. before any attempted moves. +using ConstantSPOSet = ConstantSPOSetT; - ValueMatrix ref_psi_; - GradMatrix ref_egrad_; - ValueMatrix ref_elapl_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp index ecdb5dd696..5ada9b4f9d 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp @@ -43,9 +43,8 @@ ConstantSPOSetT::makeClone() const return myclone; } -template -void -ConstantSPOSetT::checkOutVariables(const OptVariablesType& active) +template +void ConstantSPOSetT::checkOutVariables(const OptVariablesTypeT& active) { APP_ABORT("ConstantSPOSet should not call checkOutVariables"); }; diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h index d1ee5b24f7..335796df96 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h @@ -49,8 +49,7 @@ class ConstantSPOSetT : public SPOSetT return "ConstantSPOSet"; }; - void - checkOutVariables(const OptVariablesType& active) final; + void checkOutVariables(const OptVariablesTypeT& active) final; void setOrbitalSetSize(int norbs) final; diff --git a/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp b/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp index 4ce591df94..aa5149ee9f 100644 --- a/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp +++ b/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp @@ -776,7 +776,7 @@ void test_DiracDeterminantBatched_spinor_update(const int delay_rank, DetMatInve //reject move and check for initial values for mw_evalGrad std::fill(grads.begin(), grads.end(), 0); - elec_.mw_accept_rejectMove(p_ref_list, 1, {false, false}); + elec_.mw_accept_rejectMoveT(p_ref_list, 1, {false, false}); dd.mw_evalGrad(dd_ref_list, p_ref_list, 1, grads); for (int iw = 0; iw < grads.size(); iw++) { @@ -798,7 +798,7 @@ void test_DiracDeterminantBatched_spinor_update(const int delay_rank, DetMatInve //now make and accept move, checking new values elec_.mw_makeMove(p_ref_list, 1, displs); - elec_.mw_accept_rejectMove(p_ref_list, 1, {true, true}); + elec_.mw_accept_rejectMoveT(p_ref_list, 1, {true, true}); G = 0; L = 0; diff --git a/src/QMCWaveFunctions/tests/test_MO.cpp b/src/QMCWaveFunctions/tests/test_MO.cpp index 95539da705..24f31800a9 100644 --- a/src/QMCWaveFunctions/tests/test_MO.cpp +++ b/src/QMCWaveFunctions/tests/test_MO.cpp @@ -19,7 +19,8 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" -#include +#include "ResourceCollection.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp index 721a563349..1b8f323dec 100644 --- a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp +++ b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp @@ -20,6 +20,7 @@ #include "QMCWaveFunctions/SPOSetBuilderFactory.h" #include "Utilities/ResourceCollection.h" #include "QMCWaveFunctions/SpinorSet.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { @@ -114,11 +115,11 @@ void test_lcao_spinor() CHECK(d2psiM[iat][0] == ComplexApprox(vlp).epsilon(eps)); } - /** this is a somewhat simple example. We have an ion at the origin - * and a gaussian basis function centered on the ion as a orbital. - * In this case, the ion derivative is actually just the negative of - * the electron gradient. - */ + // this is a somewhat simple example. We have an ion at the origin + // and a gaussian basis function centered on the ion as a orbital. + // In this case, the ion derivative is actually just the negative of + // the electron gradient. + SPOSet::GradMatrix gradIon(elec_.R.size(), spo->getOrbitalSetSize()); spo->evaluateGradSource(elec_, 0, elec_.R.size(), ions_, 0, gradIon); for (int iat = 0; iat < 1; iat++) @@ -146,9 +147,9 @@ void test_lcao_spinor() d2psi_work.resize(OrbitalSetSize); dspsi_work.resize(OrbitalSetSize); - //We worked hard to generate nice reference data above. Let's generate a test for evaluateV - //and evaluateVGL by perturbing the electronic configuration by dR, and then make - //single particle moves that bring it back to our original R reference values. + // We worked hard to generate nice reference data above. Let's generate a test for evaluateV + // and evaluateVGL by perturbing the electronic configuration by dR, and then make + // single particle moves that bring it back to our original R reference values. //Our perturbation vector. ParticleSet::ParticlePos dR; @@ -164,7 +165,7 @@ void test_lcao_spinor() elec_.R = Rnew; elec_.update(); - //Now we test evaluateValue() + // Now we test evaluateValue() for (unsigned int iat = 0; iat < 1; iat++) { psi_work = 0.0; @@ -175,7 +176,7 @@ void test_lcao_spinor() elec_.rejectMove(iat); } - //Now we test evaluateVGL() + // Now we test evaluateVGL() for (unsigned int iat = 0; iat < 1; iat++) { psi_work = 0.0; @@ -195,7 +196,7 @@ void test_lcao_spinor() elec_.rejectMove(iat); } - //Now we test evaluateSpin: + // Now we test evaluateSpin: for (unsigned int iat = 0; iat < 1; iat++) { @@ -217,11 +218,11 @@ void test_lcao_spinor() elec_.R = Rnew; elec_.update(); - //make a spin displacement, just set to zero for the test + // make a spin displacement, just set to zero for the test ParticleSet::ParticleScalar dS; dS.resize(1); - //now create second walker + // now create second walker ParticleSet elec_2(elec_); elec_2.R[0] = {-0.4, 1.5, -0.2}; elec_2.spins[0] = -1.3; @@ -248,8 +249,8 @@ void test_lcao_spinor() spo_list.push_back(*spo); spo_list.push_back(*spo_2); - //test resource APIs - //First resource is created, and then passed to the colleciton so it should be null + // test resource APIs + // First resource is created, and then passed to the colleciton so it should be null ResourceCollection spo_res("test_spo_res"); spo->createResource(spo_res); SpinorSet& spinor = spo_list.getCastedLeader(); @@ -275,13 +276,13 @@ void test_lcao_spinor() spo->mw_evaluate_notranspose(spo_list, p_list, 0, 1, logdet_list, dlogdet_list, d2logdet_list); for (unsigned int iat = 0; iat < 1; iat++) { - //walker 0 + // walker 0 CHECK(logdet_list[0].get()[iat][0] == ComplexApprox(val).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][0] == ComplexApprox(vdx).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][1] == ComplexApprox(vdy).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][2] == ComplexApprox(vdz).epsilon(eps)); CHECK(d2logdet_list[0].get()[iat][0] == ComplexApprox(vlp).epsilon(eps)); - //walker 1 + // walker 1 CHECK(logdet_list[1].get()[iat][0] == ComplexApprox(val2).epsilon(eps)); CHECK(dlogdet_list[1].get()[iat][0][0] == ComplexApprox(vdx2).epsilon(eps)); CHECK(dlogdet_list[1].get()[iat][0][1] == ComplexApprox(vdy2).epsilon(eps)); @@ -289,7 +290,7 @@ void test_lcao_spinor() CHECK(d2logdet_list[1].get()[iat][0] == ComplexApprox(vlp2).epsilon(eps)); } - //first, lets displace all the elec in each walker + // first, lets displace all the elec in each walker for (int iat = 0; iat < 1; iat++) { MCCoords displs(2); @@ -297,7 +298,7 @@ void test_lcao_spinor() displs.spins = {dS[iat], dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -310,10 +311,10 @@ void test_lcao_spinor() RefVector d2psi_v_list = {d2psi_work, d2psi_work_2}; SPOSet::OffloadMatrix mw_dspin; mw_dspin.resize(2, OrbitalSetSize); - //check mw_evaluateVGLWithSpin + // check mw_evaluateVGLWithSpin for (int iat = 0; iat < 1; iat++) { - //reset values to zero, updates the ref vectors to zero as well + // reset values to zero, updates the ref vectors to zero as well psi_work = 0.0; dpsi_work = 0.0; d2psi_work = 0.0; @@ -327,14 +328,14 @@ void test_lcao_spinor() displs.spins = {-dS[iat], -dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); spo->mw_evaluateVGLWithSpin(spo_list, p_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); - //walker 0 + // walker 0 CHECK(psi_v_list[0].get()[0] == ComplexApprox(val).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][0] == ComplexApprox(vdx).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][1] == ComplexApprox(vdy).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][2] == ComplexApprox(vdz).epsilon(eps)); CHECK(d2psi_v_list[0].get()[0] == ComplexApprox(vlp).epsilon(eps)); CHECK(mw_dspin(0, 0) == ComplexApprox(vds).epsilon(eps)); - //walker 1 + // walker 1 CHECK(psi_v_list[1].get()[0] == ComplexApprox(val2).epsilon(eps)); CHECK(dpsi_v_list[1].get()[0][0] == ComplexApprox(vdx2).epsilon(eps)); CHECK(dpsi_v_list[1].get()[0][1] == ComplexApprox(vdy2).epsilon(eps)); @@ -343,7 +344,7 @@ void test_lcao_spinor() CHECK(mw_dspin(1, 0) == ComplexApprox(vds2).epsilon(eps)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } @@ -621,7 +622,7 @@ void test_lcao_spinor_excited() displs.spins = {dS[iat], dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -666,7 +667,7 @@ void test_lcao_spinor_excited() CHECK(d2psi_v_list[1].get()[0] == ComplexApprox(vlp2).epsilon(eps)); CHECK(mw_dspin(1, 0) == ComplexApprox(vds2).epsilon(eps)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp deleted file mode 100644 index af6f5b9cf0..0000000000 --- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp +++ /dev/null @@ -1,868 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2022 QMCPACK developers. -// -// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories -// -// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "catch.hpp" - -#include "type_traits/template_types.hpp" -#include "type_traits/ConvertToReal.h" -#include "OhmmsData/Libxml2Doc.h" -#include "OhmmsPETE/OhmmsMatrix.h" -#include "Particle/ParticleSet.h" -#include "Particle/ParticleSetPool.h" -#include "QMCWaveFunctions/WaveFunctionComponent.h" -#include "BsplineFactory/EinsplineSetBuilder.h" -#include "QMCWaveFunctions/RotatedSPOs.h" -#include "checkMatrix.hpp" -#include "FakeSPO.h" -#include - -#include -#include -#include - -using std::string; - -namespace qmcplusplus -{ -/* - JPT 04.01.2022: Adapted from test_einset.cpp - Test the spline rotated machinery for SplineR2R (extend to others later). -*/ -TEST_CASE("RotatedSPOs via SplineR2R", "[wavefunction]") -{ - using RealType = QMCTraits::RealType; - - /* - BEGIN Boilerplate stuff to make a simple SPOSet. Copied from test_einset.cpp - */ - - Communicate* c = OHMMS::Controller; - - // We get a "Mismatched supercell lattices" error due to default ctor? - ParticleSet::ParticleLayout lattice; - - // diamondC_1x1x1 - lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, 3.37316115, 0.0, 3.37316115}; - - ParticleSetPool ptcl = ParticleSetPool(c); - ptcl.setSimulationCell(lattice); - // LAttice seems fine after this point... - - auto ions_uptr = std::make_unique(ptcl.getSimulationCell()); - auto elec_uptr = std::make_unique(ptcl.getSimulationCell()); - ParticleSet& ions_(*ions_uptr); - ParticleSet& elec_(*elec_uptr); - - ions_.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions_.create({2}); - ions_.R[0] = {0.0, 0.0, 0.0}; - ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; - elec_.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec_.create({2}); - elec_.R[0] = {0.0, 0.0, 0.0}; - elec_.R[1] = {0.0, 1.0, 0.0}; - SpeciesSet& tspecies = elec_.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - //diamondC_1x1x1 - 8 bands available - const char* particles = R"( - - -)"; - - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); - - xmlNodePtr root = doc.getRoot(); - - xmlNodePtr ein1 = xmlFirstElementChild(root); - - EinsplineSetBuilder einSet(elec_, ptcl.getPool(), c, ein1); - auto spo = einSet.createSPOSetFromXML(ein1); - REQUIRE(spo); - - /* - END Boilerplate stuff. Now we have a SplineR2R wavefunction - ready for rotation. What follows is the actual test. - */ - - // SplineR2R only for the moment, so skip if QMC_COMPLEX is set -#if !defined(QMC_COMPLEX) - - spo->storeParamsBeforeRotation(); - // 1.) Make a RotatedSPOs object so that we can use the rotation routines - auto rot_spo = std::make_unique("one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 8); - - // 2.) Get data for unrotated orbitals. Check that there's no rotation - rot_spo->buildOptVariables(elec_.R.size()); - SPOSet::ValueMatrix psiM_bare(elec_.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_bare(elec_.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_bare(elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // This stuff checks that no rotation was applied. Copied from test_einset.cpp. - // value - CHECK(std::real(psiM_bare[1][0]) == Approx(-0.8886948824)); - CHECK(std::real(psiM_bare[1][1]) == Approx(1.4194120169)); - // grad - CHECK(std::real(dpsiM_bare[1][0][0]) == Approx(-0.0000183403)); - CHECK(std::real(dpsiM_bare[1][0][1]) == Approx(0.1655139178)); - CHECK(std::real(dpsiM_bare[1][0][2]) == Approx(-0.0000193077)); - CHECK(std::real(dpsiM_bare[1][1][0]) == Approx(-1.3131694794)); - CHECK(std::real(dpsiM_bare[1][1][1]) == Approx(-1.1174004078)); - CHECK(std::real(dpsiM_bare[1][1][2]) == Approx(-0.8462534547)); - // lapl - CHECK(std::real(d2psiM_bare[1][0]) == Approx(1.3313053846)); - CHECK(std::real(d2psiM_bare[1][1]) == Approx(-4.712583065)); - - /* - 3.) Apply a rotation to the orbitals - To do this, construct a params vector and call the - RotatedSPOs::apply_rotation(params) method. That should do the - right thing for this particular spline class. - - For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params. - */ - const auto rot_size = rot_spo->m_act_rot_inds.size(); - REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 - std::vector param(rot_size); - for (auto i = 0; i < rot_size; i++) - { - param[i] = 0.01 * static_cast(i); - } - rot_spo->apply_rotation(param, false); // Expect this to call SplineR2R::applyRotation() - - // 4.) Get data for rotated orbitals. - SPOSet::ValueMatrix psiM_rot(elec_.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_rot(elec_.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_rot(elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); - - /* - Manually encode the unitary transformation. Ugly, but it works. - @TODO: Use the total rotation machinery when it's implemented - - NB: This is truncated to 5 sig-figs, so there is some slop here as - compared to what is done in the splines via apply_rotation(). - So below we reduce the threshold for comparison. This can - probably be ditched once we have a way to grab the actual - rotation matrix... - */ - SPOSet::ValueMatrix rot_mat(orbitalsetsize, orbitalsetsize); - rot_mat[0][0] = 0.99726; - rot_mat[0][1] = -0.00722; - rot_mat[0][2] = 0.00014; - rot_mat[0][3] = -0.00982; - rot_mat[0][4] = -0.01979; - rot_mat[0][5] = -0.02976; - rot_mat[0][6] = -0.03972; - rot_mat[0][7] = -0.04969; - rot_mat[1][0] = -0.00722; - rot_mat[1][1] = 0.97754; - rot_mat[1][2] = -0.05955; - rot_mat[1][3] = -0.06945; - rot_mat[1][4] = -0.07935; - rot_mat[1][5] = -0.08925; - rot_mat[1][6] = -0.09915; - rot_mat[1][7] = -0.10905; - rot_mat[2][0] = -0.00014; - rot_mat[2][1] = 0.05955; - rot_mat[2][2] = 0.99821; - rot_mat[2][3] = -0.00209; - rot_mat[2][4] = -0.00239; - rot_mat[2][5] = -0.00269; - rot_mat[2][6] = -0.00299; - rot_mat[2][7] = -0.00329; - rot_mat[3][0] = 0.00982; - rot_mat[3][1] = 0.06945; - rot_mat[3][2] = -0.00209; - rot_mat[3][3] = 0.99751; - rot_mat[3][4] = -0.00289; - rot_mat[3][5] = -0.00329; - rot_mat[3][6] = -0.00368; - rot_mat[3][7] = -0.00408; - rot_mat[4][0] = 0.01979; - rot_mat[4][1] = 0.07935; - rot_mat[4][2] = -0.00239; - rot_mat[4][3] = -0.00289; - rot_mat[4][4] = 0.99661; - rot_mat[4][5] = -0.00388; - rot_mat[4][6] = -0.00438; - rot_mat[4][7] = -0.00488; - rot_mat[5][0] = 0.02976; - rot_mat[5][1] = 0.08925; - rot_mat[5][2] = -0.00269; - rot_mat[5][3] = -0.00329; - rot_mat[5][4] = -0.00388; - rot_mat[5][5] = 0.99552; - rot_mat[5][6] = -0.00508; - rot_mat[5][7] = -0.00568; - rot_mat[6][0] = 0.03972; - rot_mat[6][1] = 0.09915; - rot_mat[6][2] = -0.00299; - rot_mat[6][3] = -0.00368; - rot_mat[6][4] = -0.00438; - rot_mat[6][5] = -0.00508; - rot_mat[6][6] = 0.99422; - rot_mat[6][7] = -0.00647; - rot_mat[7][0] = 0.04969; - rot_mat[7][1] = 0.10905; - rot_mat[7][2] = -0.00329; - rot_mat[7][3] = -0.00408; - rot_mat[7][4] = -0.00488; - rot_mat[7][5] = -0.00568; - rot_mat[7][6] = -0.00647; - rot_mat[7][7] = 0.99273; - - // Now compute the expected values by hand using the transformation above - double val1 = 0.; - double val2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) - { - val1 += psiM_bare[0][i] * rot_mat[i][0]; - val2 += psiM_bare[1][i] * rot_mat[i][0]; - } - - // value - CHECK(std::real(psiM_rot[0][0]) == Approx(val1)); - CHECK(std::real(psiM_rot[1][0]) == Approx(val2)); - - std::vector grad1(3); - std::vector grad2(3); - for (auto j = 0; j < grad1.size(); j++) - { - for (auto i = 0; i < rot_mat.size1(); i++) - { - grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; - grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; - } - } - - // grad - CHECK(dpsiM_rot[0][0][0] == Approx(grad1[0]).epsilon(0.0001)); - CHECK(dpsiM_rot[0][0][1] == Approx(grad1[1]).epsilon(0.0001)); - CHECK(dpsiM_rot[0][0][2] == Approx(grad1[2]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][0] == Approx(grad2[0]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][1] == Approx(grad2[1]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][2] == Approx(grad2[2]).epsilon(0.0001)); - - double lap1 = 0.; - double lap2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) - { - lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; - lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; - } - - // Lapl - CHECK(std::real(d2psiM_rot[0][0]) == Approx(lap1).epsilon(0.0001)); - CHECK(std::real(d2psiM_rot[1][0]) == Approx(lap2).epsilon(0.0001)); - -#endif -} - -TEST_CASE("RotatedSPOs createRotationIndices", "[wavefunction]") -{ - // No active-active or virtual-virtual rotations - // Only active-virtual - RotatedSPOs::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - CHECK(rot_ind.size() == 2); - - // Full rotation contains all rotations - // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 - RotatedSPOs::RotationIndices full_rot_ind; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind); - CHECK(full_rot_ind.size() == 3); - - nel = 2; - RotatedSPOs::RotationIndices rot_ind2; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind2); - CHECK(rot_ind2.size() == 2); - - RotatedSPOs::RotationIndices full_rot_ind2; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind2); - CHECK(full_rot_ind2.size() == 3); - - nmo = 4; - RotatedSPOs::RotationIndices rot_ind3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind3); - CHECK(rot_ind3.size() == 4); - - RotatedSPOs::RotationIndices full_rot_ind3; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind3); - CHECK(full_rot_ind3.size() == 6); -} - -TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - RotatedSPOs::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - - ValueMatrix m3(nmo, nmo); - m3 = ValueType(0); - std::vector params = {0.1, 0.2}; - - RotatedSPOs::constructAntiSymmetricMatrix(rot_ind, params, m3); - - // clang-format off - std::vector expected_data = { 0.0, -0.1, -0.2, - 0.1, 0.0, 0.0, - 0.2, 0.0, 0.0 }; - // clang-format on - - ValueMatrix expected_m3(expected_data.data(), 3, 3); - - CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); } - - std::vector params_out(2); - RotatedSPOs::extractParamsFromAntiSymmetricMatrix(rot_ind, m3, params_out); - CHECK(params_out[0] == Approx(0.1)); - CHECK(params_out[1] == Approx(0.2)); -} - -// Expected values of the matrix exponential come from gen_matrix_ops.py -TEST_CASE("RotatedSPOs exponentiate matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - std::vector mat1_data = {0.0}; - SPOSet::ValueMatrix m1(mat1_data.data(), 1, 1); - RotatedSPOs::exponentiate_antisym_matrix(m1); - // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(m1(0, 0) == ValueApprox(1.0)); - - // clang-format off - std::vector mat2_data = { 0.0, -0.1, - 0.1, 0.0 }; - // clang-format on - - SPOSet::ValueMatrix m2(mat2_data.data(), 2, 2); - RotatedSPOs::exponentiate_antisym_matrix(m2); - - // clang-format off - std::vector expected_rot2 = { 0.995004165278026, -0.0998334166468282, - 0.0998334166468282, 0.995004165278026 }; - // clang-format on - - ValueMatrix expected_m2(expected_rot2.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); - CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - - - // clang-format off - std::vector m3_input_data = { 0.0, -0.3, -0.1, - 0.3, 0.0, -0.2, - 0.1, 0.2, 0.0 }; - - - std::vector expected_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, - 0.283164960565074, 0.935754803277919, -0.210191705950743, - 0.127334574917630, 0.180540076694398, 0.975290308953046 }; - - // clang-format on - - ValueMatrix m3(m3_input_data.data(), 3, 3); - ValueMatrix expected_m3(expected_rot3.data(), 3, 3); - - RotatedSPOs::exponentiate_antisym_matrix(m3); - - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } -} - -TEST_CASE("RotatedSPOs log matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - std::vector mat1_data = {1.0}; - SPOSet::ValueMatrix m1(mat1_data.data(), 1, 1); - SPOSet::ValueMatrix out_m1(1, 1); - RotatedSPOs::log_antisym_matrix(m1, out_m1); - // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(out_m1(0, 0) == ValueApprox(0.0)); - - // clang-format off - std::vector start_rot2 = { 0.995004165278026, -0.0998334166468282, - 0.0998334166468282, 0.995004165278026 }; - - std::vector mat2_data = { 0.0, -0.1, - 0.1, 0.0 }; - // clang-format on - - ValueMatrix rot_m2(start_rot2.data(), 2, 2); - ValueMatrix out_m2(2, 2); - RotatedSPOs::log_antisym_matrix(rot_m2, out_m2); - - SPOSet::ValueMatrix m2(mat2_data.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); - CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - - // clang-format off - std::vector start_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, - 0.283164960565074, 0.935754803277919, -0.210191705950743, - 0.127334574917630, 0.180540076694398, 0.975290308953046 }; - - std::vector m3_input_data = { 0.0, -0.3, -0.1, - 0.3, 0.0, -0.2, - 0.1, 0.2, 0.0 }; - // clang-format on - ValueMatrix rot_m3(start_rot3.data(), 3, 3); - ValueMatrix out_m3(3, 3); - RotatedSPOs::log_antisym_matrix(rot_m3, out_m3); - - SPOSet::ValueMatrix m3(m3_input_data.data(), 3, 3); - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); - CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } -} - -// Test round trip A -> exp(A) -> log(exp(A)) -// The log is multi-valued so this test may fail if the rotation parameters are too large. -// The exponentials will be the same, though -// exp(log(exp(A))) == exp(A) -TEST_CASE("RotatedSPOs exp-log matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - RotatedSPOs::RotationIndices rot_ind; - int nel = 2; - int nmo = 4; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); - - std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; - - RotatedSPOs::constructAntiSymmetricMatrix(rot_ind, params4, rot_m4); - ValueMatrix orig_rot_m4 = rot_m4; - ValueMatrix out_m4(nmo, nmo); - - RotatedSPOs::exponentiate_antisym_matrix(rot_m4); - - RotatedSPOs::log_antisym_matrix(rot_m4, out_m4); - - CheckMatrixResult check_matrix_result4 = checkMatrix(out_m4, orig_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - - std::vector params4out(4); - RotatedSPOs::extractParamsFromAntiSymmetricMatrix(rot_ind, out_m4, params4out); - for (int i = 0; i < params4.size(); i++) - { - CHECK(params4[i] == Approx(params4out[i])); - } -} - -TEST_CASE("RotatedSPOs hcpBe", "[wavefunction]") -{ - using RealType = QMCTraits::RealType; - Communicate* c = OHMMS::Controller; - - ParticleSet::ParticleLayout lattice; - lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, - 0.00000000, 0.00000000, 0.00000000, 6.78114995}; - - ParticleSetPool ptcl = ParticleSetPool(c); - ptcl.setSimulationCell(lattice); - auto ions_uptr = std::make_unique(ptcl.getSimulationCell()); - auto elec_uptr = std::make_unique(ptcl.getSimulationCell()); - ParticleSet& ions(*ions_uptr); - ParticleSet& elec(*elec_uptr); - - ions.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions.create({1}); - ions.R[0] = {0.0, 0.0, 0.0}; - - elec.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec.create({1}); - elec.R[0] = {0.0, 0.0, 0.0}; - - SpeciesSet& tspecies = elec.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - // Add the attribute save_coefs="yes" to the sposet_builder tag to generate the - // spline file for use in eval_bspline_spo.py - - const char* particles = R"( - - - -)"; - - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); - - xmlNodePtr root = doc.getRoot(); - - xmlNodePtr sposet_builder = xmlFirstElementChild(root); - xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); - - EinsplineSetBuilder einSet(elec, ptcl.getPool(), c, sposet_builder); - auto spo = einSet.createSPOSetFromXML(sposet_ptr); - REQUIRE(spo); - - spo->storeParamsBeforeRotation(); - auto rot_spo = std::make_unique("one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 1 electron, 2 orbitals - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 2); - - rot_spo->buildOptVariables(elec.R.size()); - - SPOSet::ValueMatrix psiM_bare(elec.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_bare(elec.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_bare(elec.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // Values generated from eval_bspline_spo.py, the generate_point_values_hcpBe function - CHECK(std::real(psiM_bare[0][0]) == Approx(0.210221765375514)); - CHECK(std::real(psiM_bare[0][1]) == Approx(-2.984345024542937e-06)); - - CHECK(std::real(d2psiM_bare[0][0]) == Approx(5.303848362116568)); - - opt_variables_type opt_vars; - rot_spo->checkInVariablesExclusive(opt_vars); - opt_vars.resetIndex(); - rot_spo->checkOutVariables(opt_vars); - rot_spo->resetParametersExclusive(opt_vars); - - using ValueType = QMCTraits::ValueType; - Vector dlogpsi(1); - Vector dhpsioverpsi(1); - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - - CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); - CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); - - std::vector params = {0.1}; - rot_spo->apply_rotation(params, false); - - rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - CHECK(std::real(psiM_bare[0][0]) == Approx(0.20917123424337608)); - CHECK(std::real(psiM_bare[0][1]) == Approx(-0.02099012652669549)); - - CHECK(std::real(d2psiM_bare[0][0]) == Approx(5.277362065087747)); - - dlogpsi[0] = 0.0; - dhpsioverpsi[0] = 0.0; - - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); - CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); -} - -// Test construction of delta rotation -TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - int nel = 2; - int nmo = 4; - RotatedSPOs::RotationIndices rot_ind; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - RotatedSPOs::RotationIndices full_rot_ind; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind); - // rot_ind size is 4 and full rot_ind size is 6 - - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); - - // When comparing with gen_matrix_ops.py, be aware of the order of indices - // in full_rot - // rot_ind is (0,2) (0,3) (1,2) (1,3) - // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) - // The extra indices go at the back - std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; - std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; - std::vector new_params(6); - - RotatedSPOs::constructDeltaRotation(delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); - - // clang-format off - std::vector rot_data4 = - { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, - -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, - 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, - 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; - // clang-format on - - ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); - - CheckMatrixResult check_matrix_result4 = checkMatrix(rot_m4, new_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - - // Reminder: Ordering! - std::vector expected_new_param = {1.6813965019790489, 0.3623564254653294, -0.05486544454559908, - -0.20574472941408453, -0.9542513302873077, 0.27497788909911774}; - for (int i = 0; i < new_params.size(); i++) - CHECK(new_params[i] == Approx(expected_new_param[i])); - - - // Rotated back to original position - - std::vector new_params2(6); - std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; - RotatedSPOs::constructDeltaRotation(reverse_delta_params, new_params, rot_ind, full_rot_ind, new_params2, rot_m4); - for (int i = 0; i < new_params2.size(); i++) - CHECK(new_params2[i] == Approx(old_params[i])); -} - -namespace testing -{ -opt_variables_type& getMyVars(SPOSet& rot) { return rot.myVars; } -opt_variables_type& getMyVarsFull(RotatedSPOs& rot) { return rot.myVarsFull; } -std::vector>& getHistoryParams(RotatedSPOs& rot) { return rot.history_params_; } -} // namespace testing - -// Test using global rotation -TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction]") -{ - auto fake_spo = std::make_unique(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOs rot("fake_rot", std::move(fake_spo)); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSet vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); - - { - hdf_archive hout; - vs.writeToHDF("rot_vp.h5", hout); - - rot.writeVariationalParameters(hout); - } - - auto fake_spo2 = std::make_unique(); - fake_spo2->setOrbitalSetSize(4); - - RotatedSPOs rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); - - optimize::VariableSet vs2; - rot2.checkInVariablesExclusive(vs2); - - hdf_archive hin; - vs2.readFromHDF("rot_vp.h5", hin); - rot2.readVariationalParameters(hin); - - opt_variables_type& var = testing::getMyVars(rot2); - CHECK(var[0] == Approx(vs[0])); - CHECK(var[1] == Approx(vs[1])); - CHECK(var[2] == Approx(vs[2])); - CHECK(var[3] == Approx(vs[3])); - - opt_variables_type& full_var = testing::getMyVarsFull(rot2); - CHECK(full_var[0] == Approx(vs[0])); - CHECK(full_var[1] == Approx(vs[1])); - CHECK(full_var[2] == Approx(vs[2])); - CHECK(full_var[3] == Approx(vs[3])); - CHECK(full_var[4] == Approx(0.0)); - CHECK(full_var[5] == Approx(0.0)); -} - -// Test using history list. -TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction]") -{ - auto fake_spo = std::make_unique(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOs rot("fake_rot", std::move(fake_spo)); - rot.set_use_global_rotation(false); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSet vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); - - { - hdf_archive hout; - vs.writeToHDF("rot_vp_hist.h5", hout); - - rot.writeVariationalParameters(hout); - } - - auto fake_spo2 = std::make_unique(); - fake_spo2->setOrbitalSetSize(4); - - RotatedSPOs rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); - - optimize::VariableSet vs2; - rot2.checkInVariablesExclusive(vs2); - - hdf_archive hin; - vs2.readFromHDF("rot_vp_hist.h5", hin); - rot2.readVariationalParameters(hin); - - opt_variables_type& var = testing::getMyVars(rot2); - CHECK(var[0] == Approx(vs[0])); - CHECK(var[1] == Approx(vs[1])); - CHECK(var[2] == Approx(vs[2])); - CHECK(var[3] == Approx(vs[3])); - - auto hist = testing::getHistoryParams(rot2); - REQUIRE(hist.size() == 1); - REQUIRE(hist[0].size() == 4); -} - -class DummySPOSetWithoutMW : public SPOSet -{ -public: - DummySPOSetWithoutMW(const std::string& my_name) : SPOSet(my_name) {} - void setOrbitalSetSize(int norbs) override {} - void evaluateValue(const ParticleSet& P, int iat, SPOSet::ValueVector& psi) override - { - assert(psi.size() == 3); - psi[0] = 123; - psi[1] = 456; - psi[2] = 789; - } - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override {} - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - {} - std::string getClassName() const override { return my_name_; } -}; - -class DummySPOSetWithMW : public DummySPOSetWithoutMW -{ -public: - DummySPOSetWithMW(const std::string& my_name) : DummySPOSetWithoutMW(my_name) {} - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const override - { - for (auto& psi : psi_v_list) - { - assert(psi.get().size() == 3); - psi.get()[0] = 321; - psi.get()[1] = 654; - psi.get()[2] = 987; - } - } -}; - -TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction]") -{ - //checking that mw_ API works in RotatedSPOs and is not defaulting to - //SPOSet default implementation - { - //First check calling the mw_ APIs for RotatedSPOs, for which the - //underlying implementation just calls the underlying SPOSet mw_ API - //In the case that the underlying SPOSet doesn't specialize the mw_ API, - //the underlying SPOSet will fall back to the default SPOSet mw_, which is - //just a loop over the single walker API. - RotatedSPOs rot_spo0("rotated0", std::make_unique("no mw 0")); - RotatedSPOs rot_spo1("rotated1", std::make_unique("no mw 1")); - RefVectorWithLeader spo_list(rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock mw_sposet_lock(spo_res, spo_list); - - const SimulationCell simulation_cell; - ParticleSet elec0(simulation_cell); - ParticleSet elec1(simulation_cell); - RefVectorWithLeader p_list(elec0, {elec0, elec1}); - - SPOSet::ValueVector psi0(3); - SPOSet::ValueVector psi1(3); - RefVector psi_v_list{psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) - { - CHECK(psi_v_list[iw].get()[0] == Approx(123)); - CHECK(psi_v_list[iw].get()[1] == Approx(456)); - CHECK(psi_v_list[iw].get()[2] == Approx(789)); - } - } - { - //In the case that the underlying SPOSet DOES have mw_ specializations, - //we want to make sure that RotatedSPOs are triggering that appropriately - //This will mean that the underlying SPOSets will do the appropriate offloading - //To check this, DummySPOSetWithMW has an explicit mw_evaluateValue which sets - //different values than what gets set in evaluateValue. By doing this, - //we are ensuring that RotatedSPOs->mw_evaluaeValue is calling the specialization - //in the underlying SPO and not using the default SPOSet implementation which - //loops over single walker APIs (which have different values enforced in - // DummySPOSetWithoutMW - - RotatedSPOs rot_spo0("rotated0", std::make_unique("mw 0")); - RotatedSPOs rot_spo1("rotated1", std::make_unique("mw 1")); - RefVectorWithLeader spo_list(rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock mw_sposet_lock(spo_res, spo_list); - - const SimulationCell simulation_cell; - ParticleSet elec0(simulation_cell); - ParticleSet elec1(simulation_cell); - RefVectorWithLeader p_list(elec0, {elec0, elec1}); - - SPOSet::ValueVector psi0(3); - SPOSet::ValueVector psi1(3); - RefVector psi_v_list{psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) - { - CHECK(psi_v_list[iw].get()[0] == Approx(321)); - CHECK(psi_v_list[iw].get()[1] == Approx(654)); - CHECK(psi_v_list[iw].get()[2] == Approx(987)); - } - } -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp index e5c04d205f..708e478053 100644 --- a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp @@ -28,6 +28,7 @@ #include #include +#include using std::string; @@ -49,22 +50,22 @@ using ValueApprox = typename ValueApproxHelper::Type; namespace testing { -OptVariablesType& +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } -OptVariablesType& +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } -OptVariablesType& +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; } -OptVariablesType& +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; @@ -82,12 +83,22 @@ getHistoryParams(RotatedSPOsT& rot) } } // namespace testing +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +using TestTypeList = std::tuple; +#else +using TestTypeList = std::tuple; +#endif +#else +using TestTypeList = std::tuple<>; +#endif + /* JPT 04.01.2022: Adapted from test_einset.cpp Test the spline rotated machinery for SplineR2R (extend to others later). */ -TEMPLATE_TEST_CASE( - "RotatedSPOs via SplineR2R", "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList) { using RealType = typename SPOSetT::RealType; @@ -353,8 +364,8 @@ TEMPLATE_TEST_CASE( #endif } -TEMPLATE_TEST_CASE("RotatedSPOs createRotationIndices", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices", + "[wavefunction][template]", TestTypeList) { // No active-active or virtual-virtual rotations // Only active-virtual @@ -389,8 +400,8 @@ TEMPLATE_TEST_CASE("RotatedSPOs createRotationIndices", CHECK(full_rot_ind3.size() == 6); } -TEMPLATE_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", + "[wavefunction][template]", TestTypeList) { using ValueType = typename SPOSetT::ValueType; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -428,8 +439,8 @@ TEMPLATE_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", } // Expected values of the matrix exponential come from gen_matrix_ops.py -TEMPLATE_TEST_CASE("RotatedSPOs exponentiate matrix", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList) { using ValueType = typename SPOSetT::ValueType; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -484,8 +495,8 @@ TEMPLATE_TEST_CASE("RotatedSPOs exponentiate matrix", } } -TEMPLATE_TEST_CASE( - "RotatedSPOs log matrix", "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList) { using ValueType = typename SPOSetT::ValueType; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -541,8 +552,8 @@ TEMPLATE_TEST_CASE( // The log is multi-valued so this test may fail if the rotation parameters are // too large. The exponentials will be the same, though // exp(log(exp(A))) == exp(A) -TEMPLATE_TEST_CASE( - "RotatedSPOs exp-log matrix", "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList) { using ValueType = typename SPOSetT::ValueType; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -581,8 +592,8 @@ TEMPLATE_TEST_CASE( } } -TEMPLATE_TEST_CASE( - "RotatedSPOs hcpBe", "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList) { using RealType = typename OrbitalSetTraits::RealType; Communicate* c = OHMMS::Controller; @@ -667,7 +678,7 @@ TEMPLATE_TEST_CASE( CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox(5.303848362116568)); - OptVariablesType opt_vars; + OptVariablesTypeT opt_vars; rot_spo->checkInVariablesExclusive(opt_vars); opt_vars.resetIndex(); rot_spo->checkOutVariables(opt_vars); @@ -703,8 +714,8 @@ TEMPLATE_TEST_CASE( } // Test construction of delta rotation -TEMPLATE_TEST_CASE("RotatedSPOs construct delta matrix", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix", + "[wavefunction][template]", TestTypeList) { using ValueType = typename SPOSetT::ValueType; using ValueMatrix = typename SPOSetT::ValueMatrix; @@ -767,8 +778,8 @@ TEMPLATE_TEST_CASE("RotatedSPOs construct delta matrix", } // Test using global rotation -TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters", + "[wavefunction][template]", TestTypeList) { auto fake_spo = std::make_unique>(); fake_spo->setOrbitalSetSize(4); @@ -820,8 +831,8 @@ TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters", } // Test using history list. -TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters history", - "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history", + "[wavefunction][template]", TestTypeList) { auto fake_spo = std::make_unique>(); fake_spo->setOrbitalSetSize(4); @@ -936,8 +947,8 @@ class DummySPOSetWithMWT : public DummySPOSetWithoutMWT } }; -TEMPLATE_TEST_CASE( - "RotatedSPOs mw_ APIs", "[wavefunction][template]", double, float) +TEMPLATE_LIST_TEST_CASE( + "RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList) { // checking that mw_ API works in RotatedSPOs and is not defaulting to // SPOSet default implementation diff --git a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp index ed969b392c..463b0e8374 100644 --- a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp +++ b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp @@ -24,6 +24,7 @@ #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "Utilities/RuntimeOptions.h" #include +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp b/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp index 1eebcabd0b..12c3208437 100644 --- a/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp +++ b/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp @@ -19,6 +19,7 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_einset.cpp b/src/QMCWaveFunctions/tests/test_einset.cpp index 8dd00c2621..7cc6a6a8f0 100644 --- a/src/QMCWaveFunctions/tests/test_einset.cpp +++ b/src/QMCWaveFunctions/tests/test_einset.cpp @@ -19,6 +19,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "BsplineFactory/EinsplineSetBuilder.h" #include "BsplineFactory/EinsplineSpinorSetBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include #include diff --git a/src/QMCWaveFunctions/tests/test_einset_spinor.cpp b/src/QMCWaveFunctions/tests/test_einset_spinor.cpp index 83693e52b9..6fb31c1867 100644 --- a/src/QMCWaveFunctions/tests/test_einset_spinor.cpp +++ b/src/QMCWaveFunctions/tests/test_einset_spinor.cpp @@ -21,7 +21,7 @@ #include "QMCWaveFunctions/SPOSetBuilderFactory.h" #include "Utilities/ResourceCollection.h" #include "QMCWaveFunctions/SpinorSet.h" - +#include "QMCWaveFunctions/SPOSet.h" #include #include #include @@ -531,7 +531,7 @@ TEST_CASE("Einspline SpinorSet from HDF", "[wavefunction]") elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -612,7 +612,7 @@ TEST_CASE("Einspline SpinorSet from HDF", "[wavefunction]") CHECK(mw_dspin[1][2] == ComplexApprox(dspsiM_ref[(iat + 1) % 3][2]).epsilon(h)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } diff --git a/src/QMCWaveFunctions/tests/test_example_he.cpp b/src/QMCWaveFunctions/tests/test_example_he.cpp index a8f980f63a..5ab56f0d71 100644 --- a/src/QMCWaveFunctions/tests/test_example_he.cpp +++ b/src/QMCWaveFunctions/tests/test_example_he.cpp @@ -19,6 +19,7 @@ #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "QMCWaveFunctions/ExampleHeComponent.h" #include "Utilities/RuntimeOptions.h" +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_hybridrep.cpp b/src/QMCWaveFunctions/tests/test_hybridrep.cpp index 956756fc48..2106553a83 100644 --- a/src/QMCWaveFunctions/tests/test_hybridrep.cpp +++ b/src/QMCWaveFunctions/tests/test_hybridrep.cpp @@ -24,6 +24,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "BsplineFactory/EinsplineSetBuilder.h" #include "BsplineFactory/EinsplineSpinorSetBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include using std::string; diff --git a/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp b/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp index e5f7f68364..d880d36b86 100644 --- a/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp +++ b/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp @@ -26,7 +26,7 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" - +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { void test_C_diamond() diff --git a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp index 1c7e02bff9..166df18c29 100644 --- a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp +++ b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp @@ -21,8 +21,8 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" -#include "QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h" - +#include "QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h" +#include "QMCWaveFunctions/SPOSet.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" namespace qmcplusplus @@ -33,12 +33,13 @@ TEST_CASE("readCuspInfo", "[wavefunction]") using GridType = OneDimGridBase; - Matrix info; + Matrix> info; int num_center = 3; int orbital_set_size = 7; info.resize(num_center, orbital_set_size); - bool okay = readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); + bool okay = + CuspCorrectionConstructionT::readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); REQUIRE(okay); // N @@ -61,7 +62,6 @@ TEST_CASE("readCuspInfo", "[wavefunction]") CHECK(info(2, 4).alpha[4] == Approx(-404.733151049101)); // a5 } - TEST_CASE("applyCuspInfo", "[wavefunction]") { Communicate* c = OHMMS::Controller; @@ -138,16 +138,17 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") using RealType = QMCTraits::RealType; - splitPhiEta(center_idx, corrCenter, phi, eta); + CuspCorrectionConstructionT::splitPhiEta(center_idx, corrCenter, phi, eta); // 1S orbital on N CHECK((*phi.C)(0, 0) == Approx(1.00180500)); CHECK((*eta.C)(0, 0) == Approx(0.0)); int orbital_set_size = 7; - Matrix info; + Matrix> info; info.resize(num_center, orbital_set_size); - okay = readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); + okay = + CuspCorrectionConstructionT::readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); REQUIRE(okay); Vector xgrid; @@ -162,7 +163,8 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") rad_orb.resize(ngrid); int mo_idx = 0; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Comparisons generated from gen_cusp_corr.py // Center 0 MO 0 rc = 0.07691307008 @@ -179,7 +181,8 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") mo_idx = 1; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Center 0 MO 1 rc = 0.060909477888 CHECK(rad_orb[0] == Approx(-0.0099816961)); // x = 0.012 @@ -202,14 +205,15 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") // C is second atom center_idx = 1; - splitPhiEta(center_idx, corrCenter, phi, eta); + CuspCorrectionConstructionT::splitPhiEta(center_idx, corrCenter, phi, eta); // 1S orbital on N CHECK((*phi.C)(0, 0) == Approx(0.0)); CHECK((*eta.C)(0, 0) == Approx(1.00180500)); mo_idx = 0; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Center 1 MO 0 rc = 0.105 CHECK(rad_orb[0] == Approx(0.0017535517)); // x = 0.012 @@ -224,7 +228,7 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") CHECK(rad_orb[9] == Approx(0.0010837868)); // x = 0.12 - removeSTypeOrbitals(corrCenter, lcob); + CuspCorrectionConstructionT::removeSTypeOrbitals(corrCenter, lcob); CHECK((*lcob.C)(0, 0) == Approx(0.0)); CHECK((*lcob.C)(0, 1) == Approx(0.0)); @@ -551,7 +555,7 @@ TEST_CASE("Ethanol MO with cusp", "[wavefunction]") TEST_CASE("broadcastCuspInfo", "[wavefunction]") { Communicate* c = OHMMS::Controller; - CuspCorrectionParameters cp; + CuspCorrectionParametersT cp; int root = 0; if (c->rank() == root) { @@ -566,7 +570,7 @@ TEST_CASE("broadcastCuspInfo", "[wavefunction]") cp.redo = 1; } - broadcastCuspInfo(cp, *c, root); + CuspCorrectionConstructionT::broadcastCuspInfo(cp, *c, root); CHECK(cp.Rc == Approx(2.0)); CHECK(cp.C == Approx(3.0)); diff --git a/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp b/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp index b0741c2074..ac0f6f960c 100644 --- a/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp +++ b/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp @@ -20,6 +20,7 @@ #include "BsplineFactory/EinsplineSpinorSetBuilder.h" #include "QMCWaveFunctions/BsplineFactory/SplineC2C.h" #include "Utilities/for_testing/checkMatrix.hpp" +#include "QMCWaveFunctions/SPOSet.h" #include #include diff --git a/src/spline/test_bspline.h b/src/spline/test_bspline.h index e61fb8bdad..a480b1815e 100644 --- a/src/spline/test_bspline.h +++ b/src/spline/test_bspline.h @@ -86,7 +86,6 @@ void test_bspline(ParticleSet& TargetPtcl, SPE1& a, SPE2& b) { int N = a.OrbitalSetSize; SPOSet::RealType eps = static_cast(numeric_limits::epsilon()); - //SPOSet::RealType eps=1e-6; SPOSet::ValueVector psi_0(N); SPOSet::ValueVector psi_1(N); SPOSet::GradVector dpsi_0(N); diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp index 83aecc96d4..76ebeddcbb 100644 --- a/src/type_traits/complex_help.hpp +++ b/src/type_traits/complex_help.hpp @@ -38,6 +38,22 @@ struct RealAlias_impl> { using value_type = T; }; template struct RealAlias_impl> { using value_type = typename T::value_type; }; +template +struct FullPrec_impl +{}; + +template +struct FullPrec_impl> +{ + using value_type = double; +}; + +template +struct FullPrec_impl> +{ + using value_type = std::complex; +}; + /** If you have a function templated on a value that can be real or complex * and you need to get the base Real type if its complex or just the real. * @@ -47,6 +63,9 @@ struct RealAlias_impl> { using value_type = typename T::value_ty template using RealAlias = typename RealAlias_impl::value_type; +template +using FullPrec = typename FullPrec_impl::value_type; + ///real part of a scalar. Cannot be replaced by std::real due to AFQMC specific needs. inline float real(const float& c) { return c; } inline double real(const double& c) { return c; }