From d1d77594bee6b3fecb3ae9aeb418d30f2d1742dc Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 21 Sep 2023 14:13:23 -0400 Subject: [PATCH] checkpoint Signed-off-by: Steven Hahn --- src/Particle/ParticleSet.BC.cpp | 6 + src/Particle/ParticleSetT.h | 4 + src/Particle/SoaDistanceTableAAOMPTarget.h | 484 +---- src/Particle/SoaDistanceTableABOMPTarget.h | 394 +--- src/QMCDrivers/MCPopulation.h | 1 - .../BsplineFactory/SplineC2C.cpp | 810 -------- .../BsplineFactory/SplineC2C.h | 207 +- .../BsplineFactory/SplineC2R.cpp | 1188 ------------ .../BsplineFactory/SplineC2R.h | 191 +- .../BsplineFactory/SplineC2ROMPTarget.cpp | 1704 ---------------- .../BsplineFactory/SplineC2ROMPTarget.h | 299 +-- .../BsplineFactory/SplineR2R.cpp | 569 ------ .../BsplineFactory/SplineR2R.h | 202 +- .../BsplineFactory/SplineR2RT.h | 4 +- src/QMCWaveFunctions/CMakeLists.txt | 10 +- src/QMCWaveFunctions/ExampleHeComponent.h | 2 +- .../Fermion/Backflow_ee_kSpace.h | 4 - .../Fermion/SlaterDetBuilder.h | 2 +- .../Jastrow/CountingGaussian.h | 1 - .../Jastrow/CountingGaussianRegion.h | 1 - src/QMCWaveFunctions/OptimizableFunctorBase.h | 4 +- src/QMCWaveFunctions/OptimizableObject.h | 94 +- src/QMCWaveFunctions/OptimizableObjectT.h | 35 +- src/QMCWaveFunctions/RotatedSPOs.cpp | 1727 ----------------- src/QMCWaveFunctions/RotatedSPOsT.cpp | 112 +- src/QMCWaveFunctions/RotatedSPOsT.h | 128 +- src/QMCWaveFunctions/SPOSet.h | 1 + src/QMCWaveFunctions/SPOSetT.cpp | 112 +- src/QMCWaveFunctions/SPOSetT.h | 127 +- src/QMCWaveFunctions/VariableSet.cpp | 314 --- src/QMCWaveFunctions/VariableSet.h | 314 +-- src/QMCWaveFunctions/VariableSetT.h | 24 +- src/QMCWaveFunctions/tests/CMakeLists.txt | 2 +- src/QMCWaveFunctions/tests/ConstantSPOSet.cpp | 100 - src/QMCWaveFunctions/tests/ConstantSPOSet.h | 69 +- .../tests/ConstantSPOSetT.cpp | 5 +- src/QMCWaveFunctions/tests/ConstantSPOSetT.h | 3 +- 37 files changed, 352 insertions(+), 8902 deletions(-) delete mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp delete mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp delete mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp delete mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp delete mode 100644 src/QMCWaveFunctions/RotatedSPOs.cpp delete mode 100644 src/QMCWaveFunctions/VariableSet.cpp delete mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSet.cpp diff --git a/src/Particle/ParticleSet.BC.cpp b/src/Particle/ParticleSet.BC.cpp index 1a7440881ad..9f7195dce54 100644 --- a/src/Particle/ParticleSet.BC.cpp +++ b/src/Particle/ParticleSet.BC.cpp @@ -209,4 +209,10 @@ void ParticleSetT::convert2CartInBox(const ParticlePos& pin, ParticlePos& pou convert2UnitInBox(pin, pout); // convert to crystalline unit convert2Cart(pout); } + +template class ParticleSetT; +//template class ParticleSetT; +//template class ParticleSetT>; +//template class ParticleSetT>; + } // namespace qmcplusplus diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h index 57eccea2631..545b23a77fa 100644 --- a/src/Particle/ParticleSetT.h +++ b/src/Particle/ParticleSetT.h @@ -75,6 +75,7 @@ class ParticleSetT : public OhmmsElementBase using Index_t = typename LatticeParticleTraits::Index_t; using Scalar_t = typename LatticeParticleTraits::Scalar_t; using Tensor_t = typename LatticeParticleTraits::Tensor_t; + using GradType = typename ParticleSetTraits::GradType; using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; using SingleParticlePos = @@ -94,6 +95,9 @@ class ParticleSetT : public OhmmsElementBase using PropertyContainer_t = typename Walker_t::PropertyContainer_t; /// buffer type for a serialized buffer using Buffer_t = PooledData; + + using SingleParticleValue = typename LatticeParticleTraits::SingleParticleValue; + enum quantum_domains { no_quantum_domain = 0, diff --git a/src/Particle/SoaDistanceTableAAOMPTarget.h b/src/Particle/SoaDistanceTableAAOMPTarget.h index 7bafcd33c18..5eb91e236ef 100644 --- a/src/Particle/SoaDistanceTableAAOMPTarget.h +++ b/src/Particle/SoaDistanceTableAAOMPTarget.h @@ -14,14 +14,7 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "CPU/SIMD/algorithm.hpp" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableAATOMPTarget.h" namespace qmcplusplus { @@ -29,480 +22,7 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public DistanceTableAA -{ - /// actual memory for dist and displacements_ - aligned_vector memory_pool_; - - /// actual memory for temp_r_ - DistRow temp_r_mem_; - /// actual memory for temp_dr_ - DisplRow temp_dr_mem_; - /// actual memory for old_r_ - DistRow old_r_mem_; - /// actual memory for old_dr_ - DisplRow old_dr_mem_; - - ///multi walker shared memory buffer - struct DTAAMultiWalkerMem : public Resource - { - ///dist displ for temporary and old pairs - Vector>> mw_new_old_dist_displ; - - /** distances from a range of indics to the source. - * for original particle index i (row) and source particle id j (col) - * j < i, the element data is dist(r_i - r_j) - * j > i, the element data is dist(r_(n - 1 - i) - r_(n - 1 - j)) - */ - Vector>> mw_distances_subset; - - DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") {} - - DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - SoaDistanceTableAAOMPTarget(ParticleSet& target) - : DTD_BConds(target.getLattice()), - DistanceTableAA(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(num_targets_)), -#if !defined(NDEBUG) - old_prepared_elec_id_(-1), -#endif - offload_timer_(createGlobalTimer(std::string("DTAAOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTAAOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAAOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAAOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&target.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - resize(); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - } - - SoaDistanceTableAAOMPTarget() = delete; - SoaDistanceTableAAOMPTarget(const SoaDistanceTableAAOMPTarget&) = delete; - ~SoaDistanceTableAAOMPTarget(){PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")} - - size_t compute_size(int N) const - { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; - } - - void resize() - { - // initialize memory containers and views - const size_t total_size = compute_size(num_targets_); - memory_pool_.resize(total_size * (1 + D)); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); - displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); - } - - old_r_mem_.resize(num_targets_); - old_dr_mem_.resize(num_targets_); - temp_r_mem_.resize(num_targets_); - temp_dr_mem_.resize(num_targets_); - } - - const RealType* getMultiWalkerTempDataPtr() const override - { - return mw_mem_handle_.getResource().mw_new_old_dist_displ.data(); - } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - - auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ; - mw_new_old_dist_displ.resize(nw * 2 * stride_size); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * iw, num_targets_padded_); - dt.temp_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * iw + num_targets_padded_); - dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw), num_targets_padded_); - dt.old_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * (iw + nw) + num_targets_padded_); - } - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - const size_t nw = dt_list.size(); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - } - - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); - - constexpr T BigR = std::numeric_limits::max(); - for (int iat = 1; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), - displacements_[iat], 0, iat, iat); - } - - /** compute distances from particles in [range_begin, range_end) to all the particles. - * Although [range_begin, range_end) and be any particle [0, num_sources), it is only necessary to compute - * half of the table due to the symmetry of AA table. See note of the output data object mw_distances_subset - * To keep resident memory minimal on the device, range_end - range_begin < num_particls_stored is required. - */ - const RealType* mw_evalDistsInRange(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - size_t range_begin, - size_t range_end) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - const size_t subset_size = range_end - range_begin; - if (subset_size > dt_leader.num_particls_stored) - throw std::runtime_error("not enough internal buffer"); - - ScopedTimer local_timer(dt_leader.evaluate_timer_); - - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - const size_t nw = dt_list.size(); - const auto num_sources_local = dt_leader.num_targets_; - const auto num_padded = dt_leader.num_targets_padded_; - mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded); - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* dist_ranged = mw_mem.mw_distances_subset.data(); - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams)") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - { - for (int irow = 0; irow < subset_size; irow++) - { - T* dist = dist_ranged + (irow + subset_size * iw) * num_padded; - size_t id_target = irow + range_begin; +using SoaDistanceTableAAOMPTarget = SoaDistanceTableAATOMPTarget; - T dx, dy, dz; - if (id_target < iel) - { - dx = source_pos_ptr[id_target] - source_pos_ptr[iel]; - dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded]; - dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2]; - } - else - { - const size_t id_target_reverse = num_sources_local - 1 - id_target; - const size_t iel_reverse = num_sources_local - 1 - iel; - dx = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse]; - dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded]; - dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2]; - } - - dist[iel] = DTD_BConds::computeDist(dx, dy, dz); - } - } - } - } - return mw_mem.mw_distances_subset.data(); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - -#if !defined(NDEBUG) - old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size()); - temp_dr_.attachReference(temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data()); - - assert((prepare_old && iat >= 0 && iat < num_targets_) || !prepare_old); - DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, 0, - num_targets_, iat); - // set up old_r_ and old_dr_ for moves may get accepted. - if (prepare_old) - { - old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size()); - old_dr_.attachReference(old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data()); - //recompute from scratch - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), old_r_.data(), old_dr_, - 0, num_targets_, iat); - old_r_[iat] = std::numeric_limits::max(); //assign a big number - } - } - - /** evaluate the temporary pair relations when a move is proposed - * this implementation is asynchronous and the synchronization is managed at ParticleSet. - * Transferring results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST. - * If the temporary pair distance are consumed on the device directly, the device to host data transfer can be - * skipped as an optimization. - */ - void mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& rnew_list, - const IndexType iat, - bool prepare_old = true) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - ScopedTimer local_timer(move_timer_); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ; - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); -#if !defined(NDEBUG) - dt.old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - auto& coordinates_soa = static_cast(p_list[iw].getCoordinates()); - } - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - const auto num_sources_local = num_targets_; - const auto num_padded = num_targets_padded_; - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* r_dr_ptr = mw_new_old_dist_displ.data(); - auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data(); - const size_t new_pos_stride = coordinates_leader.getFusedNewPosBuffer().capacity(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams) \ - nowait depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - { // temp - auto* r_iw_ptr = r_dr_ptr + iw * stride_size; - auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = new_pos_ptr[idim * new_pos_stride + iw]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - } - - if (prepare_old) - { // old - auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size; - auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = source_pos_ptr[idim * num_padded + iat]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - r_iw_ptr[iat] = std::numeric_limits::max(); //assign a big number - } - } - } - - if (modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) - { - PRAGMA_OFFLOAD("omp target update nowait depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ - from(r_dr_ptr[:mw_new_old_dist_displ.size()])") - } - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - //ensure there are neighbors - assert(num_targets_ > 1); - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_targets_; ++jat) - if (temp_r_[jat] < min_dist && jat != iat) - { - min_dist = temp_r_[jat]; - index = jat; - } - assert(index >= 0); - dr = temp_dr_[index]; - } - else - { - for (int jat = 0; jat < iat; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - for (int jat = iat + 1; jat < num_targets_; ++jat) - if (distances_[jat][iat] < min_dist) - { - min_dist = distances_[jat][iat]; - index = jat; - } - assert(index != iat && index >= 0); - if (index < iat) - dr = displacements_[iat][index]; - else - dr = displacements_[index][iat]; - } - r = min_dist; - return index; - } - - /** After accepting the iat-th particle, update the iat-th row of distances_ and displacements_. - * Upper triangle is not needed in the later computation and thus not updated - */ - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - //update [0, iat) columns - const int nupdate = iat; - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[iat].data(idim)); - //copy column - for (size_t i = iat + 1; i < num_targets_; ++i) - { - distances_[i][iat] = temp_r_[i]; - displacements_[i](iat) = -temp_dr_[i]; - } - } - - void updatePartial(IndexType jat, bool from_temp) override - { - ScopedTimer local_timer(update_timer_); - - //update [0, jat) - const int nupdate = jat; - if (from_temp) - { - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - else - { - assert(old_prepared_elec_id_ == jat); - //copy row - assert(nupdate <= old_r_.size()); - std::copy_n(old_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(old_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - } - - void mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, - const std::vector& from_temp) override - { - // if temp data on host is not updated by mw_move during p-by-p moves, there is no need to update distance table - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)) - return; - - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - void mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - // if the distance table is not updated by mw_move during p-by-p, needs to recompute the whole table - // before being used by Hamiltonian if requested - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) && (modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP)) - mw_evaluate(dt_list, p_list); - } - - size_t get_num_particls_stored() const override { return num_particls_stored; } - -private: - ///number of targets with padding - const size_t num_targets_padded_; -#if !defined(NDEBUG) - /** set to particle id after move() with prepare_old = true. -1 means not prepared. - * It is intended only for safety checks, not for codepath selection. - */ - int old_prepared_elec_id_; -#endif - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; - /// the particle count of the internal stored distances. - const size_t num_particls_stored = 64; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableABOMPTarget.h b/src/Particle/SoaDistanceTableABOMPTarget.h index 05dac1eaf13..c31b673a47b 100644 --- a/src/Particle/SoaDistanceTableABOMPTarget.h +++ b/src/Particle/SoaDistanceTableABOMPTarget.h @@ -14,403 +14,15 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableABTOMPTarget.h" namespace qmcplusplus { /**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form + * @brief A derived classe from DistacneTableData, specialized for dense case */ template -class SoaDistanceTableABOMPTarget : public DTD_BConds, public DistanceTableAB -{ -private: - template - using OffloadPinnedVector = Vector>>; - - ///accelerator output buffer for r and dr - OffloadPinnedVector r_dr_memorypool_; - ///accelerator input array for a list of target particle positions, num_targets_ x D - OffloadPinnedVector target_pos; - - ///multi walker shared memory buffer - struct DTABMultiWalkerMem : public Resource - { - ///accelerator output array for multiple walkers, [1+D][num_targets_][num_padded] (distances, displacements) - OffloadPinnedVector mw_r_dr; - ///accelerator input buffer for multiple data set - OffloadPinnedVector offload_input; - - DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") {} - - DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - void resize() - { - if (num_sources_ * num_targets_ == 0) - return; - if (distances_.size()) - return; - - // initialize memory containers and views - const size_t num_padded = getAlignedSize(num_sources_); - const size_t stride_size = getPerTargetPctlStrideSize(); - r_dr_memorypool_.resize(stride_size * num_targets_); - - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, num_sources_); - displacements_[i].attachReference(num_sources_, num_padded, - r_dr_memorypool_.data() + i * stride_size + num_padded); - } - } - - static void associateResource(const RefVectorWithLeader& dt_list) - { - auto& dt_leader = dt_list.getCastedLeader(); - - // initialize memory containers and views - size_t count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - count_targets += dt.targets(); - dt.r_dr_memorypool_.free(); - } - - const size_t num_sources = dt_leader.num_sources_; - const size_t num_padded = getAlignedSize(dt_leader.num_sources_); - const size_t stride_size = num_padded * (D + 1); - const size_t total_targets = count_targets; - auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr; - mw_r_dr.resize(total_targets * stride_size); - - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - assert(num_sources == dt.num_sources_); - - dt.distances_.resize(dt.targets()); - dt.displacements_.resize(dt.targets()); - - for (int i = 0; i < dt.targets(); ++i) - { - dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources); - dt.displacements_[i].attachReference(num_sources, num_padded, - mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } - } - -public: - SoaDistanceTableABOMPTarget(const ParticleSet& source, ParticleSet& target) - : DTD_BConds(source.getLattice()), - DistanceTableAB(source, target, DTModes::ALL_OFF), - offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&source.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - - // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function - // temp_r_ is padded explicitly while temp_dr_ is padded internally - const int num_padded = getAlignedSize(num_sources_); - temp_r_.resize(num_padded); - temp_dr_.resize(num_sources_); - } - - SoaDistanceTableABOMPTarget() = delete; - SoaDistanceTableABOMPTarget(const SoaDistanceTableABOMPTarget&) = delete; - - ~SoaDistanceTableABOMPTarget() { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - associateResource(dt_list); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.distances_.clear(); - dt.displacements_.clear(); - } - } - - const T* getMultiWalkerDataPtr() const override { return mw_mem_handle_.getResource().mw_r_dr.data(); } - - size_t getPerTargetPctlStrideSize() const override { return getAlignedSize(num_sources_) * (D + 1); } - - /** evaluate the full table */ - inline void evaluate(ParticleSet& P) override - { - resize(); - - ScopedTimer local_timer(evaluate_timer_); - // be aware of the sign of Displacement - const int num_targets_local = num_targets_; - const int num_sources_local = num_sources_; - const int num_padded = getAlignedSize(num_sources_); - - target_pos.resize(num_targets_ * D); - for (size_t iat = 0; iat < num_targets_; iat++) - for (size_t idim = 0; idim < D; idim++) - target_pos[iat * D + idim] = P.R[iat][idim]; - - auto* target_pos_ptr = target_pos.data(); - auto* source_pos_ptr = origin_.getCoordinates().getAllParticlePos().data(); - auto* r_dr_ptr = distances_[0].data(); - assert(distances_[0].data() + num_padded == displacements_[0].data()); - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const size_t stride_size = getPerTargetPctlStrideSize(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(num_targets_*num_teams) \ - map(to: source_pos_ptr[:num_padded*D]) \ - map(always, to: target_pos_ptr[:num_targets_*D]) \ - map(always, from: r_dr_ptr[:num_targets_*stride_size])") - for (int iat = 0; iat < num_targets_local; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - auto* r_iat_ptr = r_dr_ptr + iat * stride_size; - auto* dr_iat_ptr = r_iat_ptr + num_padded; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - } - } - - inline void mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - - ScopedTimer local_timer(evaluate_timer_); - - const size_t nw = dt_list.size(); - DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& mw_r_dr = mw_mem.mw_r_dr; - - size_t count_targets = 0; - for (ParticleSet& p : p_list) - count_targets += p.getTotalNum(); - const size_t total_targets = count_targets; - - const int num_padded = getAlignedSize(num_sources_); - -#ifndef NDEBUG - const int stride_size = getPerTargetPctlStrideSize(); - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - - for (int i = 0; i < dt.targets(); ++i) - { - assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size); - assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } -#endif - - // This is horrible optimization putting different data types in a single buffer but allows a single H2D transfer - const size_t realtype_size = sizeof(RealType); - const size_t int_size = sizeof(int); - const size_t ptr_size = sizeof(RealType*); - auto& offload_input = mw_mem.offload_input; - offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size); - auto source_ptrs = reinterpret_cast(offload_input.data()); - auto target_positions = reinterpret_cast(offload_input.data() + ptr_size * nw); - auto walker_id_ptr = - reinterpret_cast(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size); - - count_targets = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - ParticleSet& pset(p_list[iw]); - - assert(dt.targets() == pset.getTotalNum()); - assert(num_sources_ == dt.num_sources_); - - auto& RSoA_OMPTarget = static_cast(dt.origin_.getCoordinates()); - source_ptrs[iw] = const_cast(RSoA_OMPTarget.getDevicePtr()); - - for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets) - { - walker_id_ptr[count_targets] = iw; - for (size_t idim = 0; idim < D; idim++) - target_positions[count_targets * D + idim] = pset.R[iat][idim]; - } - } - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto* r_dr_ptr = mw_r_dr.data(); - auto* input_ptr = offload_input.data(); - const int num_sources_local = num_sources_; - - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \ - map(always, to: input_ptr[:offload_input.size()]) \ - depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait") - for (int iat = 0; iat < total_targets; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* target_pos_ptr = reinterpret_cast(input_ptr + ptr_size * nw); - const int walker_id = - reinterpret_cast(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat]; - auto* source_pos_ptr = reinterpret_cast(input_ptr)[walker_id]; - auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1); - auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded; - - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - - if (!(modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) - { - PRAGMA_OFFLOAD( - "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") - } - // wait for computing and (optional) transferring back to host. - // It can potentially be moved to ParticleSet to fuse multiple similar taskwait - PRAGMA_OFFLOAD("omp taskwait") - } - } - - inline void mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& recompute) const override - { - mw_evaluate(dt_list, p_list); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - DTD_BConds::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, - 0, num_sources_); - // If the full table is not ready all the time, overwrite the current value. - // If this step is missing, DT values can be undefined in case a move is rejected. - if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], 0, num_sources_); - } - - ///update the stripe for jat-th particle - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - std::copy_n(temp_r_.data(), num_sources_, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim)); - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_sources_; ++jat) - if (temp_r_[jat] < min_dist) - { - min_dist = temp_r_[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = temp_dr_[index]; - } - } - else - { - for (int jat = 0; jat < num_sources_; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = displacements_[iat][index]; - } - } - assert(index >= 0 && index < num_sources_); - return index; - } +using SoaDistanceTableABOMPTarget = SoaDistanceTableABTOMPTarget; -private: - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h index b19f043aa72..1d876030be6 100644 --- a/src/QMCDrivers/MCPopulation.h +++ b/src/QMCDrivers/MCPopulation.h @@ -44,7 +44,6 @@ class MCPopulation using Properties = MCPWalker::PropertyContainer_t; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; - using opt_variables_type = optimize::VariableSet; private: // Potential thread safety issue diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp deleted file mode 100644 index 8a3cd77c60d..00000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp +++ /dev/null @@ -1,810 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include -#include "Concurrency/OpenMP.h" -#include "SplineC2C.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" -#include "CPU/BLAS.hpp" - -namespace qmcplusplus -{ -template -SplineC2C::SplineC2C(const SplineC2C& in) = default; - -template -inline void SplineC2C::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2C::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2C::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -void SplineC2C::storeParamsBeforeRotation() -{ - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); - - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); -} - -/* - ~~ Notes for rotation ~~ - spl_coefs = Raw pointer to spline coefficients - basis_set_size = Number of spline coefs per orbital - OrbitalSetSize = Number of orbitals (excluding padding) - - spl_coefs has a complicated layout depending on dimensionality of splines. - Luckily, for our purposes, we can think of spl_coefs as pointing to a - matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline - index adjacent in memory. The orbital index is SIMD aligned and therefore - may include padding. - - As a result, due to SIMD alignment, Nsplines may be larger than the - actual number of splined orbitals. This means that in practice rot_mat - may be smaller than the number of 'columns' in the coefs array! - - SplineR2R spl_coef layout: - ^ | sp1 | ... | spN | pad | - | |=====|=====|=====|=====| - | | c11 | ... | c1N | 0 | - basis_set_size | c21 | ... | c2N | 0 | - | | ... | ... | ... | 0 | - | | cM1 | ... | cMN | 0 | - v |=====|=====|=====|=====| - <------ Nsplines ------> - - SplineC2C spl_coef layout: - ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad | - | |=======|=======|=======|=======|=======|=======| - | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 | - basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 | - | | ... | ... | ... | ... | ... | ... | - | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 | - v |=======|=======|=======|=======|=======|=======| - <------------------ Nsplines ------------------> - - NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs - "matrix" is very tall and skinny. -*/ -template -void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto basis_set_size = coefs_tot_size / Nsplines; - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - if constexpr (std::is_same_v) - { - //if ST is double, go ahead and use blas to make things faster - //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals. - //Also casting them as ValueType so they are complex to do the correct gemm - BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(), - OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0), - (ValueType*)spl_coefs, Nsplines / 2); - } - else - { - // if ST is float, RealType is double and ValueType is std::complex for C2C - // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix - for (IndexType i = 0; i < basis_set_size; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) - { - // cur_elem points to the real componend of the coefficient. - // Imag component is adjacent in memory. - const auto cur_elem = Nsplines * i + 2 * j; - ST newval_r{0.}; - ST newval_i{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) - { - const auto index = Nsplines * i + 2 * k; - ST zr = (*coef_copy_)[index]; - ST zi = (*coef_copy_)[index + 1]; - ST wr = rot_mat[k][j].real(); - ST wi = rot_mat[k][j].imag(); - newval_r += zr * wr - zi * wi; - newval_i += zr * wi + zi * wr; - } - spl_coefs[cur_elem] = newval_r; - spl_coefs[cur_elem + 1] = newval_i; - } - } -} - -template -inline void SplineC2C::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart.data(0); - const ST* restrict ky = myKcart.data(1); - const ST* restrict kz = myKcart.data(2); -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); - } -} - -template -void SplineC2C::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } -} - -template -void SplineC2C::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = ComplexT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -/** assign_vgl - */ -template -inline void SplineC2C::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - constexpr ST zero(0); - constexpr ST two(2); - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - - const size_t N = last_spo - first_spo; -#pragma omp simd - for (size_t j = 0; j < N; ++j) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r); - } -} - -template -void SplineC2C::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } -} - -template -void SplineC2C::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - } -} - -template -void SplineC2C::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2C::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); - dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); - dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); - dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r); - grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r); - grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r); - grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r); - grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); - grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r); - grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - - grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r); - grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r); - grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r); - grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - - - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); - grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); - grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); - grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); - grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r); - } -} - -template -void SplineC2C::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type - FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template class SplineC2C; -template class SplineC2C; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h index 9410e80cfb8..d716e2d97f2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h @@ -18,212 +18,13 @@ #ifndef QMCPLUSPLUS_SPLINE_C2C_H #define QMCPLUSPLUS_SPLINE_C2C_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2CT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. - */ -template -class SplineC2C : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - // types for evaluation results - using ComplexT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2C(const std::string& my_name) : BsplineSet(my_name) {} - - SplineC2C(const SplineC2C& in); - virtual std::string getClassName() const override { return "SplineC2C"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - bool isRotationSupported() const override { return true; } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - Some results (using older Berkeley branch) were published in [3]. - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2C; -extern template class SplineC2C; +template +using SplineC2C = SplineC2CT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp deleted file mode 100644 index a1e1649b638..00000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp +++ /dev/null @@ -1,1188 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Anouar Benali, benali@anl.gov, Argonne National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Concurrency/OpenMP.h" -#include "SplineC2R.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "CPU/math.hpp" - -namespace qmcplusplus -{ -template -SplineC2R::SplineC2R(const SplineC2R& in) = default; - -template -inline void SplineC2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2R::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2R::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -inline void SplineC2R::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart.data(0); - const ST* restrict ky = myKcart.data(1); - const ST* restrict kz = myKcart.data(2); - - TT* restrict psi_s = psi.data() + first_spo; - const size_t requested_orb_size = psi.size(); -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (jr < requested_orb_size) - psi_s[jr] = val_r * c - val_i * s; - if (ji < requested_orb_size) - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - if (j < requested_orb_size) - psi_s[j] = val_r * c - val_i * s; - } -} - -template -void SplineC2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } -} - -template -void SplineC2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first_cplx, last_cplx); - - const int first_real = first_cplx + std::min(nComplexBands, first_cplx); - const int last_real = last_cplx + std::min(nComplexBands, last_cplx); - ratios_private[iat][tid] = simd::dot(psi.data() + first_real, psiinv.data() + first_real, last_real - first_real); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -/** assign_vgl - */ -template -inline void SplineC2R::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - constexpr ST two(2); - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - const ST* restrict h00 = myH.data(0); - ASSUME_ALIGNED(h00); - const ST* restrict h01 = myH.data(1); - ASSUME_ALIGNED(h01); - const ST* restrict h02 = myH.data(2); - ASSUME_ALIGNED(h02); - const ST* restrict h11 = myH.data(3); - ASSUME_ALIGNED(h11); - const ST* restrict h12 = myH.data(4); - ASSUME_ALIGNED(h12); - const ST* restrict h22 = myH.data(5); - ASSUME_ALIGNED(h22); - - const size_t requested_orb_size = psi.size(); -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - if (psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - d2psi[psiIndex] = c * lap_r - s * lap_i; - } - if (psiIndex + 1 < requested_orb_size) - { - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - } - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size) - { - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG); - const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); - const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart.data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart.data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - - const size_t N = kPoints.size(); - -#pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } - -#pragma omp simd - for (size_t j = nComplexBands; j < N; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } -} - -template -void SplineC2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2); - } -} - -template -void SplineC2R::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } -} - -template -void SplineC2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2R::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart.data(0); - const ST* restrict k1 = myKcart.data(1); - const ST* restrict k2 = myKcart.data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } -} - -template -void SplineC2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template class SplineC2R; -template class SplineC2R; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h index 05b8c4a0b34..c0d5af67261 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h @@ -21,197 +21,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_H #define QMCPLUSPLUS_SPLINE_C2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr> SplineInst; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2R(const std::string& my_name) : BsplineSet(my_name), nComplexBands(0) {} - - SplineC2R(const SplineC2R& in); - virtual std::string getClassName() const override { return "SplineC2R"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2R; -extern template class SplineC2R; +template +using SplineC2R = SplineC2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp deleted file mode 100644 index ea262bf8cc2..00000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp +++ /dev/null @@ -1,1704 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SplineC2ROMPTarget.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "ApplyPhaseC2R.hpp" -#include "Concurrency/OpenMP.h" - -namespace qmcplusplus -{ -template -SplineC2ROMPTarget::SplineC2ROMPTarget(const SplineC2ROMPTarget& in) = default; - -template -inline void SplineC2ROMPTarget::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, 2 * ispline); - SplineInst->copy_spline(spline_i, 2 * ispline + 1); -} - -template -bool SplineC2ROMPTarget::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineC2ROMPTarget::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -inline void SplineC2ROMPTarget::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST x = r[0], y = r[1], z = r[2]; - const ST* restrict kx = myKcart->data(0); - const ST* restrict ky = myKcart->data(1); - const ST* restrict kz = myKcart->data(2); - - TT* restrict psi_s = psi.data() + first_spo; -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - ST s, c; - const size_t jr = j << 1; - const size_t ji = jr + 1; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[jr] = val_r * c - val_i * s; - psi_s[ji] = val_i * c + val_r * s; - } - - psi_s += nComplexBands; -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - ST s, c; - const ST val_r = myV[2 * j]; - const ST val_i = myV[2 * j + 1]; - omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi_s[j] = val_r * c - val_i * s; - } -} - -template -void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - if (true) - { -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(r, myV, psi, first / 2, last / 2); - } - } - else - { - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - offload_scratch.resize(spline_padded_size); - results_scratch.resize(sposet_padded_size); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - auto* psi_ptr = psi.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size])") - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - } - - for (size_t i = 0; i < requested_orb_size; i++) - psi[i] = results_scratch[i]; - } - } -} - -template -void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const int nVP = VP.getTotalNum(); - psiinv_pos_copy.resize(psiinv.size() + nVP * 6); - - // stage psiinv to psiinv_pos_copy - std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data()); - - // pack particle positions - auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size(); - for (int iat = 0; iat < nVP; ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_scratch[iat * 6] = r[0]; - pos_scratch[iat * 6 + 1] = r[1]; - pos_scratch[iat * 6 + 2] = r[2]; - pos_scratch[iat * 6 + 3] = ru[0]; - pos_scratch[iat * 6 + 4] = ru[1]; - pos_scratch[iat * 6 + 5] = ru[2]; - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - ratios_private.resize(nVP, NumTeams); - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - offload_scratch.resize(spline_padded_size * nVP); - results_scratch.resize(sposet_padded_size * nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* psiinv_ptr = psiinv_pos_copy.data(); - auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psiinv.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \ - map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*nVP])") - for (int iat = 0; iat < nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; - auto* restrict pos_scratch = psiinv_ptr + requested_orb_size; - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - for (int iat = 0; iat < nVP; ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -template -void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; - auto& mw_ratios_private = mw_mem.mw_ratios_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const size_t nw = spo_list.size(); - const size_t requested_orb_size = phi_leader.size(); - - size_t mw_nVP = 0; - for (const VirtualParticleSet& VP : vp_list) - mw_nVP += VP.getTotalNum(); - - const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int)); - det_ratios_buffer_H2D.resize(packed_size); - - // pack invRow_ptr_list to det_ratios_buffer_H2D - Vector ptr_buffer(reinterpret_cast(det_ratios_buffer_H2D.data()), nw); - for (size_t iw = 0; iw < nw; iw++) - ptr_buffer[iw] = invRow_ptr_list[iw]; - - // pack particle positions - auto* pos_ptr = reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*)); - auto* ref_id_ptr = - reinterpret_cast(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - size_t iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - const VirtualParticleSet& VP = vp_list[iw]; - assert(ratios_list[iw].size() == VP.getTotalNum()); - for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) - { - ref_id_ptr[iVP] = iw; - const PointType& r = VP.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - pos_ptr[0] = r[0]; - pos_ptr[1] = r[1]; - pos_ptr[2] = r[2]; - pos_ptr[3] = ru[0]; - pos_ptr[4] = ru[1]; - pos_ptr[5] = ru[2]; - pos_ptr += 6; - } - } - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - mw_ratios_private.resize(mw_nVP, NumTeams); - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - mw_offload_scratch.resize(spline_padded_size * mw_nVP); - mw_results_scratch.resize(sposet_padded_size * mw_nVP); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* myKcart_ptr = myKcart->data(); - auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); - auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \ - map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \ - map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])") - for (int iat = 0; iat < mw_nVP; iat++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat; - auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat; - auto* ref_id_ptr = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT)); - auto* restrict psiinv_ptr = reinterpret_cast(buffer_H2D_ptr)[ref_id_ptr[iat]]; - auto* restrict pos_scratch = reinterpret_cast(buffer_H2D_ptr + nw * sizeof(ValueType*)); - - int ix, iy, iz; - ST a[4], b[4], c[4]; - spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]), - ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c); - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, - offload_scratch_iat_ptr + first + index); - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]), - psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, - nComplexBands_local, index); - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - TT sum(0); - PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)") - for (int i = first_real; i < last_real; i++) - sum += psi_iat_ptr[i] * psiinv_ptr[i]; - ratios_private_ptr[iat * NumTeams + team_id] = sum; - } - } - - // do the reduction manually - iVP = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& ratios = ratios_list[iw]; - for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < NumTeams; ++tid) - ratios[iat] += mw_ratios_private[iVP][tid]; - } - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - constexpr ST two(2); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - ASSUME_ALIGNED(k0); - const ST* restrict k1 = myKcart->data(1); - ASSUME_ALIGNED(k1); - const ST* restrict k2 = myKcart->data(2); - ASSUME_ALIGNED(k2); - - const ST* restrict g0 = myG.data(0); - ASSUME_ALIGNED(g0); - const ST* restrict g1 = myG.data(1); - ASSUME_ALIGNED(g1); - const ST* restrict g2 = myG.data(2); - ASSUME_ALIGNED(g2); - - const size_t N = kPoints.size(); - -#pragma omp simd - for (size_t j = 0; j < nComplexBands; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - psi[psiIndex + 1] = c * val_i + s * val_r; - d2psi[psiIndex] = c * lap_r - s * lap_i; - d2psi[psiIndex + 1] = c * lap_i + s * lap_r; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - } - -#pragma omp simd - for (size_t j = nComplexBands; j < N; j++) - { - const size_t jr = j << 1; - const size_t ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g0[jr]; - const ST dY_r = g1[jr]; - const ST dZ_r = g2[jr]; - - const ST dX_i = g0[ji]; - const ST dY_i = g1[ji]; - const ST dZ_i = g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); - const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - d2psi[psiIndex] = c * lap_r - s * lap_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto x = r[0], y = r[1], z = r[2]; - const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi.size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size*5])") - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c, - offload_scratch_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr, - spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local, - index); - } - } - - for (size_t i = 0; i < requested_orb_size; i++) - { - psi[i] = results_scratch[i]; - dpsi[i][0] = results_scratch[i + sposet_padded_size * 1]; - dpsi[i][1] = results_scratch[i + sposet_padded_size * 2]; - dpsi[i][2] = results_scratch[i + sposet_padded_size * 3]; - d2psi[i] = results_scratch[i + sposet_padded_size * 4]; - } -} - -template -void SplineC2ROMPTarget::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - const size_t num_pos = psi_v_list.size(); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - // for V(1)G(3)H(6) intermediate result - offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - results_scratch.resize(sposet_padded_size * num_pos * 5); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* pos_copy_ptr = multi_pos.data(); - auto* offload_scratch_ptr = offload_scratch.data(); - auto* results_scratch_ptr = results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; - const size_t nComplexBands_local = nComplexBands; - const auto requested_orb_size = psi_v_list[0].get().size(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: pos_copy_ptr[0:num_pos*6]) \ - map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4], - pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr, - sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, - myKcart_padded_size, first_spo_local, nComplexBands_local, index); - } - } - - for (int iw = 0; iw < num_pos; ++iw) - { - auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - ValueVector& psi_v(psi_v_list[iw]); - GradVector& dpsi_v(dpsi_v_list[iw]); - ValueVector& d2psi_v(d2psi_v_list[iw]); - for (size_t i = 0; i < requested_orb_size; i++) - { - psi_v[i] = results_iw_ptr[i]; - dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size]; - dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2]; - dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3]; - d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4]; - } - } -} - -template -void SplineC2ROMPTarget::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& mw_pos_copy = mw_mem.mw_pos_copy; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = sa_list.size(); - mw_pos_copy.resize(nwalkers * 6); - - // pack particle positions - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - mw_pos_copy[iw * 6] = r[0]; - mw_pos_copy[iw * 6 + 1] = r[1]; - mw_pos_copy[iw * 6 + 2] = r[2]; - mw_pos_copy[iw * 6 + 3] = ru[0]; - mw_pos_copy[iw * 6 + 4] = ru[1]; - mw_pos_copy[iw * 6 + 5] = ru[2]; - } - - phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list, - d2psi_v_list); -} - -template -void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); - auto& buffer_H2D = mw_mem.buffer_H2D; - auto& rg_private = mw_mem.rg_private; - auto& mw_offload_scratch = mw_mem.mw_offload_scratch; - auto& mw_results_scratch = mw_mem.mw_results_scratch; - const int nwalkers = spo_list.size(); - buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*)); - - // pack particle positions and invRow pointers. - for (int iw = 0; iw < nwalkers; ++iw) - { - const PointType& r = P_list[iw].activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); - Vector pos_copy(reinterpret_cast(buffer_H2D[iw]), 6); - - pos_copy[0] = r[0]; - pos_copy[1] = r[1]; - pos_copy[2] = r[2]; - pos_copy[3] = ru[0]; - pos_copy[4] = ru[1]; - pos_copy[5] = ru[2]; - - auto& invRow_ptr = *reinterpret_cast(buffer_H2D[iw] + sizeof(ST) * 6); - invRow_ptr = invRow_ptr_list[iw]; - } - - const size_t num_pos = nwalkers; - const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); - const size_t ChunkSizePerTeam = 512; - const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); - // for V(1)G(3)L(1) final result - mw_results_scratch.resize(sposet_padded_size * num_pos * 5); - // per team ratio and grads - rg_private.resize(num_pos, NumTeams * 4); - - // Ye: need to extract sizes and pointers before entering target region - const auto* spline_ptr = SplineInst->getSplinePtr(); - auto* buffer_H2D_ptr = buffer_H2D.data(); - auto* offload_scratch_ptr = mw_offload_scratch.data(); - auto* results_scratch_ptr = mw_results_scratch.data(); - const auto myKcart_padded_size = myKcart->capacity(); - auto* mKK_ptr = mKK->data(); - auto* GGt_ptr = GGt_offload->data(); - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - auto* myKcart_ptr = myKcart->data(); - auto* phi_vgl_ptr = phi_vgl_v.data(); - auto* rg_private_ptr = rg_private.data(); - const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = first_spo; - const auto requested_orb_size = phi_vgl_v.size(2); - const size_t phi_vgl_stride = num_pos * requested_orb_size; - const size_t nComplexBands_local = nComplexBands; - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \ - map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \ - map(always, from: rg_private_ptr[0:rg_private.size()])") - for (int iw = 0; iw < num_pos; iw++) - for (int team_id = 0; team_id < NumTeams; team_id++) - { - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size); - - auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS; - auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5; - const auto* restrict pos_iw_ptr = reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw); - const auto* restrict invRow_iw_ptr = - *reinterpret_cast(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6); - - int ix, iy, iz; - ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, - c, da, db, dc, d2a, d2b, d2c); - - const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2], - PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5], - PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]}; - const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6], - GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]}; - - PRAGMA_OFFLOAD("omp parallel for") - for (int index = 0; index < last - first; index++) - { - spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, - d2c, offload_scratch_iw_ptr + first + index, spline_padded_size); - const int output_index = first + index; - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] = - SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index], - offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt); - } - const size_t first_cplx = first / 2; - const size_t last_cplx = last / 2; - PRAGMA_OFFLOAD("omp parallel for") - for (int index = first_cplx; index < last_cplx; index++) - C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr, - offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size, - first_spo_local, nComplexBands_local, index); - - ValueType* restrict psi = psi_iw_ptr; - ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size; - ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2; - ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3; - ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4; - - ValueType* restrict out_phi = phi_vgl_ptr + iw * requested_orb_size; - ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride; - ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride; - ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride; - ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; - - const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx); - const size_t last_real = - omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); - ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (size_t j = first_real; j < last_real; j++) - { - out_phi[j] = psi[j]; - out_dphi_x[j] = dpsi_x[j]; - out_dphi_y[j] = dpsi_y[j]; - out_dphi_z[j] = dpsi_z[j]; - out_d2phi[j] = d2psi[j]; - - ratio += psi[j] * invRow_iw_ptr[j]; - grad_x += dpsi_x[j] * invRow_iw_ptr[j]; - grad_y += dpsi_y[j] * invRow_iw_ptr[j]; - grad_z += dpsi_z[j] * invRow_iw_ptr[j]; - } - - rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y; - rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z; - } - } - - for (int iw = 0; iw < num_pos; iw++) - { - ValueType ratio(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - ratio += rg_private[iw][team_id * 4]; - ratios[iw] = ratio; - - ValueType grad_x(0), grad_y(0), grad_z(0); - for (int team_id = 0; team_id < NumTeams; team_id++) - { - grad_x += rg_private[iw][team_id * 4 + 1]; - grad_y += rg_private[iw][team_id * 4 + 2]; - grad_z += rg_private[iw][team_id * 4 + 3]; - } - grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; - } -} - -template -void SplineC2ROMPTarget::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r; - grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - } - -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - const ST h_xx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i); - const ST h_xy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i); - const ST h_xz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i); - const ST h_yx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i); - const ST h_yy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i); - const ST h_yz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i); - const ST h_zx_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i); - const ST h_zy_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i); - const ST h_zz_r = - v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i); - - const ST h_xx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r); - const ST h_xy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r); - const ST h_xz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r); - const ST h_yx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r); - const ST h_yy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r); - const ST h_yz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r); - const ST h_zx_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r); - const ST h_zy_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r); - const ST h_zz_i = - v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r); - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i; - grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2ROMPTarget::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST x = r[0], y = r[1], z = r[2]; - - const ST* restrict k0 = myKcart->data(0); - const ST* restrict k1 = myKcart->data(1); - const ST* restrict k2 = myKcart->data(2); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - -//SIMD doesn't work quite right yet. Comment out until further debugging. -#pragma omp simd - for (size_t j = first; j < std::min(nComplexBands, last); j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + jr; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - psi[psiIndex + 1] = c * val_i + s * val_r; - dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r; - dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; - dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r; - grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r; - grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r; - grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r; - grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; - grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - - grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r; - grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r; - - grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r; - grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r; - grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r; - grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r; - - grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r; - grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r; - grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r; - grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r; - grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r; - grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r; - } -#pragma omp simd - for (size_t j = std::max(nComplexBands, first); j < last; j++) - { - int jr = j << 1; - int ji = jr + 1; - - const ST kX = k0[j]; - const ST kY = k1[j]; - const ST kZ = k2[j]; - const ST val_r = myV[jr]; - const ST val_i = myV[ji]; - - //phase - ST s, c; - omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; - const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; - const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; - - const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji]; - const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji]; - const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji]; - - // \f$\nabla \psi_r + {\bf k}\psi_i\f$ - const ST gX_r = dX_r + val_i * kX; - const ST gY_r = dY_r + val_i * kY; - const ST gZ_r = dZ_r + val_i * kZ; - const ST gX_i = dX_i - val_r * kX; - const ST gY_i = dY_i - val_r * kY; - const ST gZ_i = dZ_i - val_r * kZ; - - const size_t psiIndex = first_spo + nComplexBands + j; - psi[psiIndex] = c * val_r - s * val_i; - dpsi[psiIndex][0] = c * gX_r - s * gX_i; - dpsi[psiIndex][1] = c * gY_r - s * gY_i; - dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22); - - const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02); - const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12); - const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22); - const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12); - const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22); - const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22); - - const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r; - const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r; - const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r; - const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r; - const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r; - const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r; - - const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i; - const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i; - const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i; - const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i; - const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i; - const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i; - - grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i; - grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i; - grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i; - grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i; - grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; - grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], - gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], - gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; - const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; - const ST gh_xxy_r = - f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i; - const ST gh_xxy_i = - f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r; - const ST gh_xxz_r = - f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i; - const ST gh_xxz_i = - f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r; - const ST gh_xyy_r = - f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i; - const ST gh_xyy_i = - f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r; - const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) - - (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i; - const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) - - (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r; - const ST gh_xzz_r = - f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i; - const ST gh_xzz_i = - f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r; - const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i; - const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r; - const ST gh_yyz_r = - f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i; - const ST gh_yyz_i = - f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r; - const ST gh_yzz_r = - f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i; - const ST gh_yzz_i = - f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r; - const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i; - const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r; - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i; - grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i; - - grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i; - grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i; - grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i; - grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i; - - grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i; - grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i; - grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i; - grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i; - grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i; - grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i; - } -} - -template -void SplineC2ROMPTarget::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru(PrimLattice.toUnit_floor(r)); -#pragma omp parallel - { - int first, last; - FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2); - } -} - -template -void SplineC2ROMPTarget::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - // chunk the [first, last) loop into blocks to save temporary memory usage - const int block_size = 16; - - // reference vectors refer to the rows of matrices - std::vector multi_psi_v; - std::vector multi_dpsi_v; - std::vector multi_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - - multi_psi_v.reserve(block_size); - multi_dpsi_v.reserve(block_size); - multi_d2psi_v.reserve(block_size); - psi_v_list.reserve(block_size); - dpsi_v_list.reserve(block_size); - d2psi_v_list.reserve(block_size); - - for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size) - { - const int actual_block_size = std::min(last - iat, block_size); - multi_pos_copy.resize(actual_block_size * 6); - multi_psi_v.clear(); - multi_dpsi_v.clear(); - multi_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int ipos = 0; ipos < actual_block_size; ++ipos) - { - // pack particle positions - const PointType& r = P.activeR(iat + ipos); - PointType ru(PrimLattice.toUnit_floor(r)); - multi_pos_copy[ipos * 6] = r[0]; - multi_pos_copy[ipos * 6 + 1] = r[1]; - multi_pos_copy[ipos * 6 + 2] = r[2]; - multi_pos_copy[ipos * 6 + 3] = ru[0]; - multi_pos_copy[ipos * 6 + 4] = ru[1]; - multi_pos_copy[ipos * 6 + 5] = ru[2]; - - multi_psi_v.emplace_back(logdet[i + ipos], OrbitalSetSize); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], OrbitalSetSize); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], OrbitalSetSize); - - psi_v_list.push_back(multi_psi_v[ipos]); - dpsi_v_list.push_back(multi_dpsi_v[ipos]); - d2psi_v_list.push_back(multi_d2psi_v[ipos]); - } - - evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list); - } -} - -template class SplineC2ROMPTarget; -template class SplineC2ROMPTarget; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h index 624e4ef9acc..bcd20e53065 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h @@ -18,305 +18,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H #define QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Utilities/FairDivide.h" -#include "Utilities/TimerManager.h" -#include -#include "SplineOMPTargetMultiWalkerMem.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RTOMPTarget.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs with OpenMP offload - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2ROMPTarget : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; - -private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2ROMPTarget(const std::string& my_name) - : BsplineSet(my_name), - offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), - nComplexBands(0), - GGt_offload(std::make_shared>(9)), - PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2ROMPTarget(const SplineC2ROMPTarget& in); - - virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (size_t i = 0; i < 9; i++) - { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) - { - (*mKK)[i] = -dot(kPoints[i], kPoints[i]); - (*myKcart)(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2ROMPTarget; -extern template class SplineC2ROMPTarget; +template +using SplineC2ROMPTarget = SplineC2RTOMPTarget; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp deleted file mode 100644 index 5b0fa59ed37..00000000000 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp +++ /dev/null @@ -1,569 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2019 QMCPACK developers. -// -// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Concurrency/OpenMP.h" -#include "SplineR2R.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/CPU/BLAS.hpp" - -namespace qmcplusplus -{ -template -SplineR2R::SplineR2R(const SplineR2R& in) = default; - -template -inline void SplineR2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) -{ - SplineInst->copy_spline(spline_r, ispline); -} - -template -bool SplineR2R::read_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -bool SplineR2R::write_splines(hdf_archive& h5f) -{ - std::ostringstream o; - o << "spline_" << MyIndex; - einspline_engine bigtable(SplineInst->getSplinePtr()); - return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); -} - -template -void SplineR2R::storeParamsBeforeRotation() -{ - const auto spline_ptr = SplineInst->getSplinePtr(); - const auto coefs_tot_size = spline_ptr->coefs_size; - coef_copy_ = std::make_shared>(coefs_tot_size); - - std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin()); -} - -/* - ~~ Notes for rotation ~~ - spl_coefs = Raw pointer to spline coefficients - basis_set_size = Number of spline coefs per orbital - OrbitalSetSize = Number of orbitals (excluding padding) - - spl_coefs has a complicated layout depending on dimensionality of splines. - Luckily, for our purposes, we can think of spl_coefs as pointing to a - matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline - index adjacent in memory. The orbital index is SIMD aligned and therefore - may include padding. - - As a result, due to SIMD alignment, Nsplines may be larger than the - actual number of splined orbitals. This means that in practice rot_mat - may be smaller than the number of 'columns' in the coefs array! - - SplineR2R spl_coef layout: - ^ | sp1 | ... | spN | pad | - | |=====|=====|=====|=====| - | | c11 | ... | c1N | 0 | - basis_set_size | c21 | ... | c2N | 0 | - | | ... | ... | ... | 0 | - | | cM1 | ... | cMN | 0 | - v |=====|=====|=====|=====| - <------ Nsplines ------> - - SplineC2C spl_coef layout: - ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad | - | |=======|=======|=======|=======|=======|=======| - | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 | - basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 | - | | ... | ... | ... | ... | ... | ... | - | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 | - v |=======|=======|=======|=======|=======|=======| - <------------------ Nsplines ------------------> - - NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs - "matrix" is very tall and skinny. -*/ -template -void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp - const auto spline_ptr = SplineInst->getSplinePtr(); - assert(spline_ptr != nullptr); - const auto spl_coefs = spline_ptr->coefs; - const auto Nsplines = spline_ptr->num_splines; // May include padding - const auto coefs_tot_size = spline_ptr->coefs_size; - const auto BasisSetSize = coefs_tot_size / Nsplines; - const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); - - if (!use_stored_copy) - { - assert(coef_copy_ != nullptr); - std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); - } - - - if constexpr (std::is_same_v) - { - //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster - BLAS::gemm('N', 'N', OrbitalSetSize, BasisSetSize, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize, - coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines); - } - else - { - //Here, ST is float but ValueType is double for R2R. Due to issues with type conversions, just doing naive matrix multiplication in this case to not lose precision on rot_mat - for (IndexType i = 0; i < BasisSetSize; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) - { - const auto cur_elem = Nsplines * i + j; - FullPrecValueType newval{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) - { - const auto index = i * Nsplines + k; - newval += (*coef_copy_)[index] * rot_mat[k][j]; - } - spl_coefs[cur_elem] = newval; - } - } -} - - -template -inline void SplineR2R::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) - const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; -#pragma omp simd - for (size_t j = first; j < last; ++j) - psi[first_spo + j] = signed_one * myV[j]; -} - -template -void SplineR2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last); - } -} - -template -void SplineR2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - const bool need_resize = ratios_private.rows() < VP.getTotalNum(); - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - // initialize thread private ratios - if (need_resize) - { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime - ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); -#pragma omp barrier - } - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int last_real = kPoints.size() < last ? kPoints.size() : last; - - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - const PointType& r = VP.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - - spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); - assign_v(bc_sign, myV, psi, first, last_real); - ratios_private[iat][tid] = simd::dot(psi.data() + first, psiinv.data() + first, last_real - first); - } - } - - // do the reduction manually - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - ratios[iat] = TT(0); - for (int tid = 0; tid < ratios_private.cols(); tid++) - ratios[iat] += ratios_private[iat][tid]; - } -} - -template -inline void SplineR2R::assign_vgl(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]}; - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const size_t psiIndex = first_spo + j; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); - dpsi[psiIndex][1] = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); - dpsi[psiIndex][2] = signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]); - d2psi[psiIndex] = signed_one * SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG); - } -} - -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineR2R::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - -#pragma omp simd - for (int psiIndex = first_spo; psiIndex < last_spo; ++psiIndex) - { - const size_t j = psiIndex - first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * g0[j]; - dpsi[psiIndex][1] = signed_one * g1[j]; - dpsi[psiIndex][2] = signed_one * g2[j]; - d2psi[psiIndex] = signed_one * myL[j]; - } -} - -template -void SplineR2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgl(bc_sign, psi, dpsi, d2psi, first, last); - } -} - -template -void SplineR2R::assign_vgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last > kPoints.size() ? kPoints.size() : last; - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - -#pragma omp simd - for (size_t j = first; j < last; ++j) - { - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = signed_one * myV[j]; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g00, g01, g02); - const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g00, g01, g02); - const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g10, g11, g12); - const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - grad_grad_psi[psiIndex][0] = signed_one * h_xx_r; - grad_grad_psi[psiIndex][1] = signed_one * h_xy_r; - grad_grad_psi[psiIndex][2] = signed_one * h_xz_r; - grad_grad_psi[psiIndex][3] = signed_one * h_yx_r; - grad_grad_psi[psiIndex][4] = signed_one * h_yy_r; - grad_grad_psi[psiIndex][5] = signed_one * h_yz_r; - grad_grad_psi[psiIndex][6] = signed_one * h_zx_r; - grad_grad_psi[psiIndex][7] = signed_one * h_zy_r; - grad_grad_psi[psiIndex][8] = signed_one * h_zz_r; - } -} - -template -void SplineR2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); - assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last); - } -} - -template -void SplineR2R::assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const -{ - // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); - - const ST signed_one = (bc_sign & 1) ? -1 : 1; - const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), - g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), - g22 = PrimLattice.G(8); - - const ST* restrict g0 = myG.data(0); - const ST* restrict g1 = myG.data(1); - const ST* restrict g2 = myG.data(2); - const ST* restrict h00 = myH.data(0); - const ST* restrict h01 = myH.data(1); - const ST* restrict h02 = myH.data(2); - const ST* restrict h11 = myH.data(3); - const ST* restrict h12 = myH.data(4); - const ST* restrict h22 = myH.data(5); - - const ST* restrict gh000 = mygH.data(0); - const ST* restrict gh001 = mygH.data(1); - const ST* restrict gh002 = mygH.data(2); - const ST* restrict gh011 = mygH.data(3); - const ST* restrict gh012 = mygH.data(4); - const ST* restrict gh022 = mygH.data(5); - const ST* restrict gh111 = mygH.data(6); - const ST* restrict gh112 = mygH.data(7); - const ST* restrict gh122 = mygH.data(8); - const ST* restrict gh222 = mygH.data(9); - - //SIMD doesn't work quite right yet. Comment out until further debugging. - //#pragma omp simd - for (size_t j = first; j < last; ++j) - { - const ST val_r = myV[j]; - - - //dot(PrimLattice.G,myG[j]) - const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; - const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; - const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - - const size_t psiIndex = j + first_spo; - psi[psiIndex] = signed_one * val_r; - dpsi[psiIndex][0] = signed_one * dX_r; - dpsi[psiIndex][1] = signed_one * dY_r; - dpsi[psiIndex][2] = signed_one * dZ_r; - - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. - const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); - const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); - const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); - const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12); - const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22); - const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); - - /* const ST h_xx_r=f_xx_r; - const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; - const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; - const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; - const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; - const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ - - grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; - grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; - grad_grad_psi[psiIndex][2] = f_xz_r * signed_one; - grad_grad_psi[psiIndex][4] = f_yy_r * signed_one; - grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; - grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; - - //symmetry: - grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; - grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; - grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. - - const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02); - const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g10, g11, g12); - const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g20, g21, g22); - const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g10, g11, g12); - const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g20, g21, g22); - const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g00, g01, g02, g20, g21, g22, g20, g21, g22); - const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g10, g11, g12); - const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g20, g21, g22); - const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g10, g11, g12, g20, g21, g22, g20, g21, g22); - const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], - gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22); - - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i; - const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; - const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; - const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; - const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i; - const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; - const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; - const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; - const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; - const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/ - //[x][xx] //These are the unique entries - grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; - grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; - grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r; - grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r; - grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; - grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; - - //filling in the symmetric terms. Filling out the xij terms - grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; - - //Now for everything that's a permutation of the above: - grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; - grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4]; - grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5]; - - grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2]; - grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8]; - grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5]; - grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8]; - - grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r; - grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r; - grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r; - - grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5]; - grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8]; - grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8]; - - grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r; - } -} - -template -void SplineR2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - const PointType& r = P.activeR(iat); - PointType ru; - int bc_sign = convertPos(r, ru); - -#pragma omp parallel - { - int first, last; - FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); - assign_vghgh(bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last); - } -} - -template class SplineR2R; -template class SplineR2R; - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h index 3de6fc33fc2..62e9f058915 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h @@ -16,208 +16,12 @@ #ifndef QMCPLUSPLUS_SPLINE_R2R_H #define QMCPLUSPLUS_SPLINE_R2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineR2RT.h" namespace qmcplusplus { -/** class to match ST real spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of the sign of the real part of the phase - * Internal storage ST type arrays are aligned and padded. - */ -template -class SplineR2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - bool IsGamma; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - - -protected: - ///primitive cell - CrystalLattice PrimLattice; - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineR2R(const std::string& my_name) : BsplineSet(my_name) {} - - SplineR2R(const SplineR2R& in); - virtual std::string getClassName() const override { return "SplineR2R"; } - virtual std::string getKeyword() const override { return "SplineR2R"; } - bool isComplex() const override { return false; }; - bool isRotationSupported() const override { return true; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - - Some results (using older Berkeley branch) were published in [3]. - - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - const size_t npad = getAlignedSize(n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - - IsGamma = ((HalfG[0] == 0) && (HalfG[1] == 0) && (HalfG[2] == 0)); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - GGt = dot(transpose(PrimLattice.G), PrimLattice.G); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - /** convert position in PrimLattice unit and return sign */ - inline int convertPos(const PointType& r, PointType& ru) - { - ru = PrimLattice.toUnit(r); - int bc_sign = 0; - for (int i = 0; i < D; i++) - if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) - ru[i] = ST(0.0); - else - { - ST img = std::floor(ru[i]); - ru[i] -= img; - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) - const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineR2R; -extern template class SplineR2R; +template +using SplineR2R = SplineR2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h index f265561e18c..dcf9539b83b 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -15,8 +15,8 @@ // at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// -#ifndef QMCPLUSPLUS_SPLINE_R2R_H -#define QMCPLUSPLUS_SPLINE_R2R_H +#ifndef QMCPLUSPLUS_SPLINE_R2RT_H +#define QMCPLUSPLUS_SPLINE_R2RT_H #include "OhmmsSoA/VectorSoaContainer.h" #include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 3c780c1377b..e186ed67bed 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -23,7 +23,6 @@ add_subdirectory(detail) set(WFBASE_SRCS OptimizableFunctorBase.cpp - VariableSet.cpp VariableSetT.cpp WaveFunctionPool.cpp WaveFunctionComponent.cpp @@ -45,10 +44,6 @@ set(WFBASE_SRCS RotatedSPOsT.cpp SpinorSetT.cpp) -if(NOT QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp) -endif(NOT QMC_COMPLEX) - if(QMC_COMPLEX) set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp) endif(QMC_COMPLEX) @@ -123,12 +118,11 @@ if(OHMMS_DIM MATCHES 3) BsplineFactory/SplineC2RTOMPTarget.cpp ) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp BsplineFactory/SplineC2CT.cpp) + set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2CT.cpp) set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp) else(QMC_COMPLEX) set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp - BsplineFactory/SplineC2R.cpp BsplineFactory/SplineC2RT.cpp BsplineFactory/SplineR2R.cpp BsplineFactory/SplineR2RT.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2ROMPTarget.cpp) + BsplineFactory/SplineC2RT.cpp BsplineFactory/SplineR2RT.cpp) endif(QMC_COMPLEX) endif(HAVE_EINSPLINE) diff --git a/src/QMCWaveFunctions/ExampleHeComponent.h b/src/QMCWaveFunctions/ExampleHeComponent.h index 8e5cce23ff9..b51b63cee2c 100644 --- a/src/QMCWaveFunctions/ExampleHeComponent.h +++ b/src/QMCWaveFunctions/ExampleHeComponent.h @@ -32,7 +32,7 @@ class ExampleHeComponent : public WaveFunctionComponent, OptimizableObject my_table_ee_idx_(els.addTable(els, DTModes::NEED_TEMP_DATA_ON_HOST | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), my_table_ei_idx_(els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST)){}; - using OptVariablesType = optimize::VariableSet; + //using OptVariablesType = optimize::VariableSet; using PtclGrpIndexes = QMCTraits::PtclGrpIndexes; std::string getClassName() const override { return "ExampleHeComponent"; } diff --git a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h index 5178de3f5c4..0fb838f063f 100644 --- a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h +++ b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h @@ -26,10 +26,6 @@ namespace qmcplusplus class Backflow_ee_kSpace : public BackflowFunctionBase { using ComplexType = QMCTraits::ComplexType; - ///typedef for real values - //using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; public: //number of groups of the target particleset diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h index e7513eeeae1..76804e41281 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h +++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h @@ -19,6 +19,7 @@ #include #include "Configuration.h" #include "WaveFunctionComponentBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include namespace qmcplusplus @@ -28,7 +29,6 @@ class BackflowTransformation; class DiracDeterminantBase; class MultiSlaterDetTableMethod; struct CSFData; -class SPOSet; class SPOSetBuilder; class SPOSetBuilderFactory; struct ci_configuration; diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h index b8b99e451ba..b73cd8928b4 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h @@ -28,7 +28,6 @@ class CountingGaussian using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // enumerations for axis parameters enum A_vars diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h index 3543b802705..2a46a3f76dc 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h @@ -30,7 +30,6 @@ class CountingGaussianRegion using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // counting function pointers std::vector> C; diff --git a/src/QMCWaveFunctions/OptimizableFunctorBase.h b/src/QMCWaveFunctions/OptimizableFunctorBase.h index 33048b3049a..01c470e9a24 100644 --- a/src/QMCWaveFunctions/OptimizableFunctorBase.h +++ b/src/QMCWaveFunctions/OptimizableFunctorBase.h @@ -22,7 +22,7 @@ #include "OptimizableObject.h" #include "OhmmsData/OhmmsElementBase.h" #include "OhmmsPETE/TinyVector.h" -//#include +#include "QMCWaveFunctions/VariableSet.h" #include namespace qmcplusplus @@ -48,8 +48,6 @@ struct OptimizableFunctorBase : public OptimizableObject { ///typedef for real values using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; ///maximum cutoff real_type cutoff_radius = 0.0; ///set of variables to be optimized diff --git a/src/QMCWaveFunctions/OptimizableObject.h b/src/QMCWaveFunctions/OptimizableObject.h index 17cf4af88e5..2033e89fc9d 100644 --- a/src/QMCWaveFunctions/OptimizableObject.h +++ b/src/QMCWaveFunctions/OptimizableObject.h @@ -13,103 +13,21 @@ #ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECT_H #define QMCPLUSPLUS_OPTIMIZABLEOBJECT_H -#include "VariableSet.h" -#include "type_traits/template_types.hpp" +#include "Configuration.h" +#include "OptimizableObjectT.h" /**@file OptimizableObject.h *@brief Declaration of OptimizableObject */ namespace qmcplusplus { -using opt_variables_type = optimize::VariableSet; +using opt_variables_type = OptVariablesTypeT; -class OptimizableObject -{ -public: - OptimizableObject(const std::string& name) : name_(name) {} - - const std::string& getName() const { return name_; } - bool isOptimized() const { return is_optimized_; } - -private: - /** Name of the optimizable object - */ - const std::string name_; - /** If true, this object is actively modified during WFOpt - */ - bool is_optimized_ = false; - -public: - /** check in variational parameters to the global list of parameters used by the optimizer. - * @param active a super set of optimizable variables - * - * The existing checkInVariables implementation in WFC/SPO/.. are inclusive and it calls checkInVariables of its members - * class A: public SPOSet {} - * class B: public WFC - * { - * A objA; - * checkInVariables() { objA.checkInVariables(); } - * }; - * - * With OptimizableObject, - * class A: public OptimizableObject {} - * class B: public OptimizableObject - * { - * A objA; - * checkInVariablesExclusive() { // should not call objA.checkInVariablesExclusive() if objA has been extracted; } - * }; - * A vector of OptimizableObject, will be created by calling extractOptimizableObjects(). - * All the checkInVariablesExclusive() will be called through this vector and thus - * checkInVariablesExclusive implementation should only handle non-OptimizableObject members. - */ - virtual void checkInVariablesExclusive(opt_variables_type& active) = 0; - - /** reset the parameters during optimizations. Exclusive, see checkInVariablesExclusive - */ - virtual void resetParametersExclusive(const opt_variables_type& active) = 0; +using OptVariablesType = OptVariablesTypeT; - /** print the state, e.g., optimizables */ - virtual void reportStatus(std::ostream& os) {} - - void setOptimization(bool state) { is_optimized_ = state; } - - /** Write the variational parameters for this object to the VP HDF file - * - * The hout parameter should come from VariableSet::writeToHDF - * - * Objects can use this function to store additional information to the file. - * - * By default the parameters are saved in VariableSet::writeToHDF, and objects - * do not need to implement this function (yet). - * - */ - virtual void writeVariationalParameters(hdf_archive& hout){}; - - /** Read the variational parameters for this object from the VP HDF file - * - * The hin parameter should come from VariableSet::readFromHDF - * - * By default the parameters are read in VariableSet::readFromHDF, and objects - * do not need to implement this function (yet). - */ - virtual void readVariationalParameters(hdf_archive& hin){}; -}; - -class UniqueOptObjRefs : public RefVector -{ -public: - OptimizableObject& operator[](size_t i) const { return RefVector::operator[](i); } +using OptimizableObject = OptimizableObjectT; - void push_back(OptimizableObject& obj) - { - if (obj.getName().empty()) - throw std::logic_error("BUG!! Only named OptimizableObject object can be added to UniqueOptObjRefs!"); - auto result = - std::find_if(begin(), end(), [&](OptimizableObject& element) { return element.getName() == obj.getName(); }); - if (result == end()) - RefVector::push_back(obj); - } -}; +using UniqueOptObjRefs = UniqueOptObjRefsT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h index 111d812ae4d..1ab14979b7e 100644 --- a/src/QMCWaveFunctions/OptimizableObjectT.h +++ b/src/QMCWaveFunctions/OptimizableObjectT.h @@ -20,8 +20,8 @@ */ namespace qmcplusplus { -template -using OptVariablesType = optimize::VariableSetT; +template +using OptVariablesTypeT = optimize::VariableSetT; template class OptimizableObjectT @@ -76,28 +76,19 @@ class OptimizableObjectT * called through this vector and thus checkInVariablesExclusive * implementation should only handle non-OptimizableObject members. */ - virtual void - checkInVariablesExclusive(OptVariablesType& active) = 0; + virtual void checkInVariablesExclusive(OptVariablesTypeT& active) = 0; - /** reset the parameters during optimizations. Exclusive, see + /** reset the parameters during optimizations. Exclusive, see * checkInVariablesExclusive */ - virtual void - resetParametersExclusive(const OptVariablesType& active) = 0; + virtual void resetParametersExclusive(const OptVariablesTypeT& active) = 0; - /** print the state, e.g., optimizables */ - virtual void - reportStatus(std::ostream& os) - { - } + /** print the state, e.g., optimizables */ + virtual void reportStatus(std::ostream& os) {} - void - setOptimization(bool state) - { - is_optimized_ = state; - } + void setOptimization(bool state) { is_optimized_ = state; } - /** Write the variational parameters for this object to the VP HDF file + /** Write the variational parameters for this object to the VP HDF file * * The hout parameter should come from VariableSet::writeToHDF * @@ -108,18 +99,16 @@ class OptimizableObjectT * objects do not need to implement this function (yet). * */ - virtual void - writeVariationalParameters(hdf_archive& hout){}; + virtual void writeVariationalParameters(hdf_archive& hout){}; - /** Read the variational parameters for this object from the VP HDF file + /** Read the variational parameters for this object from the VP HDF file * * The hin parameter should come from VariableSet::readFromHDF * * By default the parameters are read in VariableSet::readFromHDF, and * objects do not need to implement this function (yet). */ - virtual void - readVariationalParameters(hdf_archive& hin){}; + virtual void readVariationalParameters(hdf_archive& hin){}; }; template diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp deleted file mode 100644 index 0815484c5e1..00000000000 --- a/src/QMCWaveFunctions/RotatedSPOs.cpp +++ /dev/null @@ -1,1727 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -//// This file is distributed under the University of Illinois/NCSA Open Source License. -//// See LICENSE file in top directory for details. -//// -//// Copyright (c) QMCPACK developers. -//// -//// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//// Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley -//// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -//// -//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//////////////////////////////////////////////////////////////////////////////////////// -#include "RotatedSPOs.h" -#include "Numerics/MatrixOperators.h" -#include "Numerics/DeterminantOperators.h" -#include "CPU/BLAS.hpp" -#include "io/hdf/hdf_archive.h" - - -namespace qmcplusplus -{ -RotatedSPOs::RotatedSPOs(const std::string& my_name, std::unique_ptr&& spos) - : SPOSet(my_name), - OptimizableObject(my_name), - Phi(std::move(spos)), - nel_major_(0), - params_supplied(false), - apply_rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine)) -{ - OrbitalSetSize = Phi->getOrbitalSetSize(); -} - -RotatedSPOs::~RotatedSPOs() {} - - -void RotatedSPOs::setRotationParameters(const std::vector& param_list) -{ - params = param_list; - params_supplied = true; -} - -void RotatedSPOs::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) -{ - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); -} - -void RotatedSPOs::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) -{ - rot_indices.reserve(nmo * (nmo - 1) / 2); - - // start with core-active rotations - put them at the beginning of the list - // so it matches the other list of rotation indices - for (int i = 0; i < nel; i++) - for (int j = nel; j < nmo; j++) - rot_indices.emplace_back(i, j); - - // Add core-core rotations - put them at the end of the list - for (int i = 0; i < nel; i++) - for (int j = i + 1; j < nel; j++) - rot_indices.emplace_back(i, j); - - // Add active-active rotations - put them at the end of the list - for (int i = nel; i < nmo; i++) - for (int j = i + 1; j < nmo; j++) - rot_indices.emplace_back(i, j); -} - -void RotatedSPOs::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat) -{ - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - rot_mat = 0.0; - - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - const RealType x = param[i]; - - rot_mat[q][p] = x; - rot_mat[p][q] = -x; - } -} - -void RotatedSPOs::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param) -{ - assert(rot_indices.size() == param.size()); - // Assumes rot_mat is of the correct size - - for (int i = 0; i < rot_indices.size(); i++) - { - const int p = rot_indices[i].first; - const int q = rot_indices[i].second; - param[i] = rot_mat[q][p]; - } -} - -void RotatedSPOs::resetParametersExclusive(const opt_variables_type& active) -{ - std::vector delta_param(m_act_rot_inds.size()); - - size_t psize = m_act_rot_inds.size(); - - if (use_global_rot_) - { - psize = m_full_rot_inds.size(); - assert(psize >= m_act_rot_inds.size()); - } - - std::vector old_param(psize); - std::vector new_param(psize); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int loc = myVars.where(i); - delta_param[i] = active[loc] - myVars[i]; - myVars[i] = active[loc]; - } - - if (use_global_rot_) - { - for (int i = 0; i < m_full_rot_inds.size(); i++) - old_param[i] = myVarsFull[i]; - - applyDeltaRotation(delta_param, old_param, new_param); - - // Save the the params - for (int i = 0; i < m_full_rot_inds.size(); i++) - myVarsFull[i] = new_param[i]; - } - else - { - apply_rotation(delta_param, false); - - // Save the parameters in the history list - history_params_.push_back(delta_param); - } -} - -void RotatedSPOs::writeVariationalParameters(hdf_archive& hout) -{ - hout.push("RotatedSPOs"); - if (use_global_rot_) - { - hout.push("rotation_global"); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); - - int nparam_full = myVarsFull.size(); - std::vector full_params(nparam_full); - for (int i = 0; i < nparam_full; i++) - full_params[i] = myVarsFull[i]; - - hout.write(full_params, rot_global_name); - hout.pop(); - } - else - { - hout.push("rotation_history"); - size_t rows = history_params_.size(); - size_t cols = 0; - if (rows > 0) - cols = history_params_[0].size(); - - Matrix tmp(rows, cols); - for (size_t i = 0; i < rows; i++) - for (size_t j = 0; j < cols; j++) - tmp(i, j) = history_params_[i][j]; - - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); - hout.write(tmp, rot_hist_name); - hout.pop(); - } - - // Save myVars in order to restore object state exactly - // The values aren't meaningful, but they need to match those saved in VariableSet - hout.push("rotation_params"); - std::string rot_params_name = std::string("rotation_params_") + SPOSet::getName(); - - int nparam = myVars.size(); - std::vector params(nparam); - for (int i = 0; i < nparam; i++) - params[i] = myVars[i]; - - hout.write(params, rot_params_name); - hout.pop(); - - hout.pop(); -} - -void RotatedSPOs::readVariationalParameters(hdf_archive& hin) -{ - hin.push("RotatedSPOs", false); - - bool grp_hist_exists = hin.is_group("rotation_history"); - bool grp_global_exists = hin.is_group("rotation_global"); - if (!grp_hist_exists && !grp_global_exists) - app_warning() << "Rotation parameters not found in VP file"; - - - if (grp_global_exists) - { - hin.push("rotation_global", false); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); - - std::vector sizes(1); - if (!hin.getShape(rot_global_name, sizes)) - throw std::runtime_error("Failed to read rotation_global in VP file"); - - int nparam_full_actual = sizes[0]; - int nparam_full = myVarsFull.size(); - - if (nparam_full != nparam_full_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file (" - << nparam_full_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } - std::vector full_params(nparam_full); - hin.read(full_params, rot_global_name); - for (int i = 0; i < nparam_full; i++) - myVarsFull[i] = full_params[i]; - - hin.pop(); - - applyFullRotation(full_params, true); - } - else if (grp_hist_exists) - { - hin.push("rotation_history", false); - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); - std::vector sizes(2); - if (!hin.getShape(rot_hist_name, sizes)) - throw std::runtime_error("Failed to read rotation history in VP file"); - - int rows = sizes[0]; - int cols = sizes[1]; - history_params_.resize(rows); - Matrix tmp(rows, cols); - hin.read(tmp, rot_hist_name); - for (size_t i = 0; i < rows; i++) - { - history_params_[i].resize(cols); - for (size_t j = 0; j < cols; j++) - history_params_[i][j] = tmp(i, j); - } - - hin.pop(); - - applyRotationHistory(); - } - - hin.push("rotation_params", false); - std::string rot_param_name = std::string("rotation_params_") + SPOSet::getName(); - - std::vector sizes(1); - if (!hin.getShape(rot_param_name, sizes)) - throw std::runtime_error("Failed to read rotation_params in VP file"); - - int nparam_actual = sizes[0]; - int nparam = myVars.size(); - if (nparam != nparam_actual) - { - std::ostringstream tmp_err; - tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file (" - << nparam_actual << ")"; - throw std::runtime_error(tmp_err.str()); - } - - std::vector params(nparam); - hin.read(params, rot_param_name); - for (int i = 0; i < nparam; i++) - myVars[i] = params[i]; - - hin.pop(); - - hin.pop(); -} - -void RotatedSPOs::buildOptVariables(const size_t nel) -{ -#if !defined(QMC_COMPLEX) - /* Only rebuild optimized variables if more after-rotation orbitals are needed - * Consider ROHF, there is only one set of SPO for both spin up and down Nup > Ndown. - * nel_major_ will be set Nup. - * - * Use the size of myVars as a flag to avoid building the rotation parameters again - * when a clone is made (the DiracDeterminant constructor calls buildOptVariables) - */ - if (nel > nel_major_ && myVars.size() == 0) - { - nel_major_ = nel; - - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotation parameter indices - RotationIndices created_m_act_rot_inds; - - RotationIndices created_full_rot_inds; - if (use_global_rot_) - createRotationIndicesFull(nel, nmo, created_full_rot_inds); - - createRotationIndices(nel, nmo, created_m_act_rot_inds); - - buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); - } -#endif -} - -void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) -{ -#if !defined(QMC_COMPLEX) - const size_t nmo = Phi->getOrbitalSetSize(); - - // create active rotations - m_act_rot_inds = rotations; - - if (use_global_rot_) - m_full_rot_inds = full_rotations; - - if (use_global_rot_) - app_log() << "Orbital rotation using global rotation" << std::endl; - else - app_log() << "Orbital rotation using history" << std::endl; - - // This will add the orbital rotation parameters to myVars - // and will also read in initial parameter values supplied in input file - int p, q; - int nparams_active = m_act_rot_inds.size(); - - app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl; - if (params_supplied) - if (nparams_active != params.size()) - throw std::runtime_error( - "The number of supplied orbital rotation parameters does not match number prdouced by the slater " - "expansion. \n"); - - myVars.clear(); - for (int i = 0; i < nparams_active; i++) - { - p = m_act_rot_inds[i].first; - q = m_act_rot_inds[i].second; - std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" - << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - - // If the user input parameters, use those. Otherwise, initialize the parameters to zero - if (params_supplied) - { - myVars.insert(sstr.str(), params[i]); - } - else - { - myVars.insert(sstr.str(), 0.0); - } - } - - if (use_global_rot_) - { - myVarsFull.clear(); - for (int i = 0; i < m_full_rot_inds.size(); i++) - { - p = m_full_rot_inds[i].first; - q = m_full_rot_inds[i].second; - std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p - << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - - if (params_supplied && i < m_act_rot_inds.size()) - myVarsFull.insert(sstr.str(), params[i]); - else - myVarsFull.insert(sstr.str(), 0.0); - } - } - - - //Printing the parameters - if (true) - { - app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n"; - myVars.print(app_log()); - } - - if (params_supplied) - { - std::vector param(m_act_rot_inds.size()); - for (int i = 0; i < m_act_rot_inds.size(); i++) - param[i] = myVars[i]; - apply_rotation(param, false); - } -#endif -} - -void RotatedSPOs::apply_rotation(const std::vector& param, bool use_stored_copy) -{ - assert(param.size() == m_act_rot_inds.size()); - - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); - - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(rot_mat, use_stored_copy); - } -} - -void RotatedSPOs::applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param) -{ - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix new_rot_mat(nmo, nmo); - constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat); - - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(new_rot_mat, true); - } -} - -void RotatedSPOs::constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat) -{ - assert(delta_param.size() == act_rot_inds.size()); - assert(old_param.size() == full_rot_inds.size()); - assert(new_param.size() == full_rot_inds.size()); - - const size_t nmo = new_rot_mat.rows(); - assert(new_rot_mat.rows() == new_rot_mat.cols()); - - ValueMatrix old_rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat); - exponentiate_antisym_matrix(old_rot_mat); - - ValueMatrix delta_rot_mat(nmo, nmo); - - constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat); - exponentiate_antisym_matrix(delta_rot_mat); - - // Apply delta rotation to old rotation. - BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), - nmo); - - ValueMatrix log_rot_mat(nmo, nmo); - log_antisym_matrix(new_rot_mat, log_rot_mat); - extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); -} - -void RotatedSPOs::applyFullRotation(const std::vector& full_param, bool use_stored_copy) -{ - assert(full_param.size() == m_full_rot_inds.size()); - - const size_t nmo = Phi->getOrbitalSetSize(); - ValueMatrix rot_mat(nmo, nmo); - rot_mat = ValueType(0); - - constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); - - /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ - exponentiate_antisym_matrix(rot_mat); - Phi->applyRotation(rot_mat, use_stored_copy); -} - -void RotatedSPOs::applyRotationHistory() -{ - for (auto delta_param : history_params_) - { - apply_rotation(delta_param, false); - } -} - -// compute exponential of a real, antisymmetric matrix by diagonalizing and exponentiating eigenvalues -void RotatedSPOs::exponentiate_antisym_matrix(ValueMatrix& mat) -{ - const int n = mat.rows(); - std::vector> mat_h(n * n, 0); - std::vector eval(n, 0); - std::vector> work(2 * n, 0); - std::vector rwork(3 * n, 0); - std::vector> mat_d(n * n, 0); - std::vector> mat_t(n * n, 0); - // exponentiating e^X = e^iY (Y hermitian) - // i(-iX) = X, so -iX is hermitian - // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T - // construct hermitian analogue of mat by multiplying by -i - for (int i = 0; i < n; ++i) - { - for (int j = i; j < n; ++j) - { - mat_h[i + n * j] = std::complex(0, -1.0 * mat[j][i]); - mat_h[j + n * i] = std::complex(0, 1.0 * mat[j][i]); - } - } - // diagonalize the matrix - char JOBZ('V'); - char UPLO('U'); - int N(n); - int LDA(n); - int LWORK(2 * n); - int info = 0; - LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::exponentiate_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - // iterate through diagonal matrix, exponentiate terms - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - mat_d[i + j * n] = (i == j) ? std::exp(std::complex(0.0, eval[i])) : std::complex(0.0, 0.0); - } - } - // perform matrix multiplication - // assume row major - BLAS::gemm('N', 'C', n, n, n, std::complex(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n, - std::complex(0.0, 0.0), &mat_t.at(0), n); - BLAS::gemm('N', 'N', n, n, n, std::complex(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n, - std::complex(0.0, 0.0), &mat_d.at(0), n); - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_d[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in orbital rotation matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_d[i + n * j].imag() << std::endl; - } - mat[j][i] = mat_d[i + n * j].real(); - } -} - -void RotatedSPOs::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) -{ - const int n = mat.rows(); - std::vector mat_h(n * n, 0); - std::vector eval_r(n, 0); - std::vector eval_i(n, 0); - std::vector mat_l(n * n, 0); - std::vector work(4 * n, 0); - - std::vector> mat_cd(n * n, 0); - std::vector> mat_cl(n * n, 0); - std::vector> mat_ch(n * n, 0); - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - mat_h[i + n * j] = mat[i][j]; - - // diagonalize the matrix - char JOBL('V'); - char JOBR('N'); - int N(n); - int LDA(n); - int LWORK(4 * n); - int info = 0; - LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, - &work.at(0), &LWORK, &info); - if (info != 0) - { - std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::log_antisym_matrix"; - throw std::runtime_error(msg.str()); - } - - // iterate through diagonal matrix, take log - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < n; ++j) - { - auto tmp = (i == j) ? std::log(std::complex(eval_r[i], eval_i[i])) : std::complex(0.0, 0.0); - mat_cd[i + j * n] = tmp; - - if (eval_i[j] > 0.0) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], mat_l[i + (j + 1) * n]); - mat_cl[i + (j + 1) * n] = std::complex(mat_l[i + j * n], -mat_l[i + (j + 1) * n]); - } - else if (!(eval_i[j] < 0.0)) - { - mat_cl[i + j * n] = std::complex(mat_l[i + j * n], 0.0); - } - } - } - - RealType one(1.0); - RealType zero(0.0); - BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n); - BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n); - - - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - { - if (mat_cd[i + n * j].imag() > 1e-12) - { - app_log() << "warning: large imaginary value in antisymmetric matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_cd[i + n * j].imag() << std::endl; - } - output[i][j] = mat_cd[i + n * j].real(); - } -} - -void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) -{ - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - const ParticleSet& P = VP.getRefPS(); - int iel = VP.refPtcl; - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T_orig; - T_orig.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T_orig.data(), nmo); - - SPOSet::ValueMatrix T; - T.resize(nel, nmo); - - ValueVector tmp_psi; - tmp_psi.resize(nmo); - - for (int iat = 0; iat < VP.getTotalNum(); iat++) - { - Phi->evaluateValue(VP, iat, tmp_psi); - - for (int j = 0; j < nmo; j++) - psiM_all(iel - FirstIndex, j) = tmp_psi[j]; - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - - // The matrix A is rectangular. Ainv is the inverse of the square part of the matrix. - // The multiply of Ainv and the square part of A is just the identity. - // This multiply could be reduced to Ainv and the non-square part of A. - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dratios(iat, kk) = T(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) - } - } - } -} - -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) -{ - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T; - T.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] = T(p, q); - } - } -} - -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) -{ - const size_t nel = LastIndex - FirstIndex; - const size_t nmo = Phi->getOrbitalSetSize(); - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1 - myG_temp.resize(nel); - myG_J.resize(nel); - myL_temp.resize(nel); - myL_J.resize(nel); - - myG_temp = 0; - myG_J = 0; - myL_temp = 0; - myL_J = 0; - - Bbar.resize(nel, nmo); - psiM_inv.resize(nel, nel); - psiM_all.resize(nel, nmo); - dpsiM_all.resize(nel, nmo); - d2psiM_all.resize(nel, nmo); - - Bbar = 0; - psiM_inv = 0; - psiM_all = 0; - dpsiM_all = 0; - d2psiM_all = 0; - - - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); - - for (int i = 0; i < nel; i++) - for (int j = 0; j < nel; j++) - psiM_inv(i, j) = psiM_all(i, j); - - Invert(psiM_inv.data(), nel, nel); - - //current value of Gradient and Laplacian - // gradient components - for (int a = 0; a < nel; a++) - for (int i = 0; i < nel; i++) - for (int k = 0; k < 3; k++) - myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k]; - // laplacian components - for (int a = 0; a < nel; a++) - { - for (int i = 0; i < nel; i++) - myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); - } - - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) - { - myG_J[a] = (P.G[iat] - myG_temp[a]); - myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); - } - //possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); - - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - const ValueType* const B(Bbar.data()); - SPOSet::ValueMatrix T; - SPOSet::ValueMatrix Y1; - SPOSet::ValueMatrix Y2; - SPOSet::ValueMatrix Y3; - SPOSet::ValueMatrix Y4; - T.resize(nel, nmo); - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); - BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), T.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y3.data(), nmo); - - //possibly replace with BLAS call - Y4 = Y3 - Y2; - - for (int i = 0; i < m_act_rot_inds.size(); i++) - { - int kk = myVars.where(i); - if (kk >= 0) - { - const int p = m_act_rot_inds.at(i).first; - const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += T(p, q); - dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); - } - } -} - -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -{ - bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) - { - int kk = myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - const int NP = P.getTotalNum(); - myG_temp.resize(NP); - myG_temp = 0.0; - myL_temp.resize(NP); - myL_temp = 0.0; - myG_J.resize(NP); - myG_J = 0.0; - myL_J.resize(NP); - myL_J = 0.0; - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - const RealType* restrict C_p = Coeff.data(); - for (int i = 0; i < Coeff.size(); i++) - { - const size_t upC = C2node_up[i]; - const size_t dnC = C2node_dn[i]; - const ValueType tmp1 = C_p[i] * detValues_dn[dnC]; - const ValueType tmp2 = C_p[i] * detValues_up[upC]; - for (size_t k = 0, j = N1; k < NP1; k++, j++) - { - myG_temp[j] += tmp1 * grads_up(upC, k); - myL_temp[j] += tmp1 * lapls_up(upC, k); - } - for (size_t k = 0, j = N2; k < NP2; k++, j++) - { - myG_temp[j] += tmp2 * grads_dn(dnC, k); - myL_temp[j] += tmp2 * lapls_dn(dnC, k); - } - } - - myG_temp *= (1 / psiCurrent); - myL_temp *= (1 / psiCurrent); - - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - for (int iat = 0; iat < (myL_temp.size()); iat++) - { - myG_J[iat] = (P.G[iat] - myG_temp[iat]); - myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); - } - - - table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, - detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn, - B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); - } -} - - -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) - { - int kk = myVars.where(k); - if (kk < 0) - continue; - if (optvars.recompute(kk)) - recalculate = true; - } - if (recalculate) - { - const size_t nmo = Phi->getOrbitalSetSize(); - const size_t nel = P.last(0) - P.first(0); - - table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up, - M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl); - } -} - -void RotatedSPOs::table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -GUIDE TO THE MATICES BEING BUILT ----------------------------------------------- -The idea here is that there is a loop over all unique determinants. For each determiant the table method is employed to calculate the contributions to the parameter derivatives (dhpsioverpsi/dlogpsi) - - loop through unquie determinants - loop through parameters - evaluate contributaion to dlogpsi and dhpsioverpsi -\noindent - - BLAS GUIDE for matrix multiplication of [ alpha * A.B + beta * C = C ] - Matrix A is of dimensions a1,a2 and Matrix B is b1,b2 in which a2=b1 - The BLAS command is as follows... - - BLAS::gemm('N','N', b2, a1, a2 ,alpha, B, b2, A, a2, beta, C, b2); - -Below is a human readable format for the matrix multiplications performed below... - -This notation is inspired by http://dx.doi.org/10.1063/1.4948778 -\newline -\hfill\break -$ - A_{i,j}=\phi_j(r_{i}) \\ - T = A^{-1} \widetilde{A} \\ - B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\ - \hat{O_{I}} = \hat{O}D_{I} \\ - D_{I}=det(A_{I}) \newline - \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\ - \Psi_{total} = \psi_{J}\psi_{MS} \\ - \alpha_{I} = P^{T}_{I}TQ_{I} \\ - M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\ -$ -\newline -There are three constants I use in the expressions for dhpsioverpsi and dlogpsi -\newline -\hfill\break -$ - const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1} C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})* Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\ -$ -\newline -Below is a translation of the shorthand I use to represent matrices independent of ``excitation matrix". -\newline -\hfill\break -$ - Y_{1} = A^{-1}B \\ - Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\ - Y_{3} = A^{-1}\widetilde{B} \\ - Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )\\ -$ -\newline -Below is a translation of the shorthand I use to represent matrices dependent on ``excitation" with respect to the reference Matrix and sums of matrices. Above this line I have represented these excitation matrices with a subscript ``I" but from this point on The subscript will be omitted and it is clear that whenever a matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix. -\newline -\hfill\break -$ - Y_{5} = TQ \\ - Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\ - Y_{7} = \alpha_{I}^{-1} P^{T} \\ - Y_{11} = \widetilde{M}Q \\ - Y_{23} = P^{T}\widetilde{M}Q \\ - Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\ - Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\ - Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\ -$ -\newline -So far you will notice that I have not included up or down arrows to specify what spin the matrices are of. This is because we are calculating the derivative of all up or all down spin orbital rotation parameters at a time. If we are finding the up spin derivatives then any term that is down spin will be constant. The following assumes that we are taking up-spin MO rotation parameter derivatives. Of course the down spin expression can be retrieved by swapping the up and down arrows. I have dubbed any expression with lowercase p prefix as a "precursor" to an expression actually used... -\newline -\hfill\break -$ - \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\ - \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\ - pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}) \\ - pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T}) \\ -$ -\newline -Now these p matrices will be used to make various expressions via BLAS commands. -\newline -\hfill\break -$ - K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\ - TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ - TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\ - K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ - MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\ - K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ \\ - K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ - TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ -$ -\newline -Now with all these matrices and constants the expressions of dhpsioverpsi and dlogpsi can be created. - - - - -In addition I will be using a special generalization of the kinetic operator which I will denote as O. Our Slater matrix with the special O operator applied to each element will be called B_bar - -$ -``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) -$ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -{ - ValueMatrix Table; - ValueMatrix Bbar; - ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26; - ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - Y1.resize(nel, nel); - Y2.resize(nel, nmo); - Y3.resize(nel, nmo); - Y4.resize(nel, nmo); - - pK1.resize(nmo, nel); - K1T.resize(nmo, nmo); - TK1T.resize(nel, nmo); - - pK2.resize(nmo, nel); - K2AiB.resize(nmo, nmo); - TK2AiB.resize(nel, nmo); - K2XA.resize(nmo, nmo); - TK2XA.resize(nel, nmo); - K2T.resize(nmo, nmo); - TK2T.resize(nel, nmo); - MK2T.resize(nel, nmo); - - pK3.resize(nmo, nel); - K3T.resize(nmo, nmo); - TK3T.resize(nel, nmo); - - pK5.resize(nmo, nel); - K5T.resize(nmo, nmo); - TK5T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - //B_grad holds the gradient operator - //B_lapl holds the laplacian operator - //B_bar will hold our special O operator - - const int offset1(N1); - const int offset2(N2); - const int NPother(NP2); - - RealType* T(Table.data()); - - //possibly replace wit BLAS calls - for (int i = 0; i < nel; i++) - for (int j = 0; j < nmo; j++) - Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); - - const RealType* restrict B(Bbar.data()); - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - BLAS::gemm('N', 'N', nel, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), T, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); - - //possibly replace with BLAS call - Y4 = Y3 - Y2; - - //Need to create the constants: (Oi, const0, const1, const2)to take advantage of minimal BLAS commands; - //Oi is the special operator applied to the slater matrix "A subscript i" from the total CI expansion - //\hat{O_{i}} = \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as \sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} - std::vector Oi(num_unique_dn_dets); - - for (int index = 0; index < num_unique_dn_dets; index++) - for (int iat = 0; iat < NPother; iat++) - Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + - myL_J[offset2 + iat] * detValues_dn[index]; - - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const2 = \sum_{i=1} C_{i}*det(A_{i\downarrow})* Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK1.begin(), pK1.end(), 0.0); - std::fill(pK2.begin(), pK2.end(), 0.0); - std::fill(pK3.begin(), pK3.end(), 0.0); - std::fill(pK5.begin(), pK5.end(), 0.0); - - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; - } - - else - { - //Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - //Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y11.resize(nel, k); - Y23.resize(k, k); - Y24.resize(k, k); - Y25.resize(k, k); - Y26.resize(k, nel); - - std::fill(Y11.begin(), Y11.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k); - } - - std::fill(Y23.begin(), Y23.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1); - } - - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k); - BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k); - - - Y26.resize(k, nel); - - std::fill(Y26.begin(), Y26.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel); - } - - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - // c_Tr_AlphaI_MI is a constant contributing to constant const2 - // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)] - RealType c_Tr_AlphaI_MI = 0.0; - for (int i = 0; i < k; i++) - { - c_Tr_AlphaI_MI += Y24(i, i); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI); - const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]); - const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]); - - const2 += alpha_1; - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1); - BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; - } - } - - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T, nmo, RealType(0.0), TK1T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T, nmo, RealType(0.0), TK2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T, nmo, RealType(0.0), TK2XA.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), - nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T, nmo, RealType(0.0), TK2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(), - nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T, nmo, RealType(0.0), TK3T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T, nmo, RealType(0.0), TK5T.data(), nmo); - - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dhpsioverpsi[kk] += - ValueType(-0.5 * Y4(i, j) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + - K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + - K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dhpsioverpsi[kk] += ValueType( - -0.5 * (Y4(i, j) - Y4(j, i)) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + - TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + - K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + K3T(i, j) - - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); - } - else - { - dhpsioverpsi[kk] += ValueType(-0.5 * - (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) - - + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); - } - } - } -} - -void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - ValueMatrix Table; - ValueMatrix Y5, Y6, Y7; - ValueMatrix pK4, K4T, TK4T; - - Table.resize(nel, nmo); - - Bbar.resize(nel, nmo); - - pK4.resize(nmo, nel); - K4T.resize(nmo, nmo); - TK4T.resize(nel, nmo); - - const int parameters_size(m_act_rot_inds.size()); - const int parameter_start_index(0); - - const size_t num_unique_up_dets(detValues_up.size()); - const size_t num_unique_dn_dets(detValues_dn.size()); - - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); - const size_t* restrict upC(C2node_up.data()); - const size_t* restrict dnC(C2node_dn.data()); - - RealType* T(Table.data()); - - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - RealType const0(0.0), const1(0.0), const2(0.0); - for (size_t i = 0; i < nc; ++i) - { - const RealType c = cptr[i]; - const size_t up = upC[i]; - const size_t down = dnC[i]; - - const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]); - } - - std::fill(pK4.begin(), pK4.end(), 0.0); - - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method - const int* restrict data_it = detData_up.data(); - for (int index = 0, datum = 0; index < num_unique_up_dets; index++) - { - const int k = data_it[datum]; - - if (k == 0) - { - datum += 3 * k + 1; - } - - else - { - //Number of rows and cols of P^T - const int prows = k; - const int pcols = nel; - //Number of rows and cols of Q - const int qrows = nmo; - const int qcols = k; - - Y5.resize(nel, k); - Y6.resize(k, k); - - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 - std::fill(Y5.begin(), Y5.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); - } - - std::fill(Y6.begin(), Y6.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); - } - - Vector WS; - Vector Piv; - WS.resize(k); - Piv.resize(k); - std::complex logdet = 0.0; - InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet); - - Y7.resize(k, nel); - - std::fill(Y7.begin(), Y7.end(), 0.0); - for (int i = 0; i < k; i++) - { - BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel); - } - - for (int p = 0; p < lookup_tbl[index].size(); p++) - { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant - const int el_p(lookup_tbl[index][p]); - const RealType c = cptr[el_p]; - const size_t up = upC[el_p]; - const size_t down = dnC[el_p]; - - const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent)); - - for (int i = 0; i < k; i++) - { - BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1); - } - } - datum += 3 * k + 1; - } - } - - BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T, nmo, RealType(0.0), TK4T.data(), nmo); - - for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) - { - int kk = myVars.where(k); - if (kk >= 0) - { - const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); - if (i <= nel - 1 && j > nel - 1) - { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - K4T(j, i) - TK4T(i, j))); - } - else if (i <= nel - 1 && j <= nel - 1) - { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); - } - else - { - dlogpsi[kk] += ValueType((K4T(i, j) - K4T(j, i))); - } - } - } -} - - -std::unique_ptr RotatedSPOs::makeClone() const -{ - auto myclone = std::make_unique(my_name_, std::unique_ptr(Phi->makeClone())); - - myclone->params = this->params; - myclone->params_supplied = this->params_supplied; - myclone->m_act_rot_inds = this->m_act_rot_inds; - myclone->m_full_rot_inds = this->m_full_rot_inds; - myclone->myVars = this->myVars; - myclone->myVarsFull = this->myVarsFull; - myclone->history_params_ = this->history_params_; - myclone->use_global_rot_ = this->use_global_rot_; - return myclone; -} - -void RotatedSPOs::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); -} - -void RotatedSPOs::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); -} - -void RotatedSPOs::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); -} - -void RotatedSPOs::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); -} - -void RotatedSPOs::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); -} - -void RotatedSPOs::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads, - spingrads); -} - -void RotatedSPOs::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list); -} - -void RotatedSPOs::createResource(ResourceCollection& collection) const { Phi->createResource(collection); } - -void RotatedSPOs::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.acquireResource(collection, phi_list); -} - -void RotatedSPOs::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - auto phi_list = extractPhiRefList(spo_list); - auto& leader = phi_list.getLeader(); - leader.releaseResource(collection, phi_list); -} - -RefVectorWithLeader RotatedSPOs::extractPhiRefList(const RefVectorWithLeader& spo_list) -{ - auto& spo_leader = spo_list.getCastedLeader(); - const auto nw = spo_list.size(); - RefVectorWithLeader phi_list(*spo_leader.Phi); - phi_list.reserve(nw); - for (int iw = 0; iw < nw; iw++) - { - RotatedSPOs& rot = spo_list.getCastedElement(iw); - phi_list.emplace_back(*rot.Phi); - } - return phi_list; -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp index 128bca9798b..b1425f1921a 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -117,9 +117,8 @@ RotatedSPOsT::extractParamsFromAntiSymmetricMatrix( } } -template -void -RotatedSPOsT::resetParametersExclusive(const OptVariablesType& active) +template +void RotatedSPOsT::resetParametersExclusive(const OptVariablesTypeT& active) { std::vector delta_param(m_act_rot_inds.size()); @@ -662,12 +661,15 @@ RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) } } -template -void -RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex) +template +void RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) { Phi->evaluateDetRatios(VP, psi, psiinv, ratios); @@ -744,11 +746,12 @@ RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, } } -template -void -RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -793,11 +796,13 @@ RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const int& FirstIndex, const int& LastIndex) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -900,21 +905,33 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, const ValueMatrix& B_lapl, - const std::vector& detData_up, const size_t N1, const size_t N2, - const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) { bool recalculate(false); for (int k = 0; k < this->myVars.size(); ++k) { @@ -975,17 +992,22 @@ RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, } } -template -void -RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const ValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { bool recalculate(false); for (int k = 0; k < this->myVars.size(); ++k) { diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h index 971d2528b38..5d524f99e3c 100644 --- a/src/QMCWaveFunctions/RotatedSPOsT.h +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -23,10 +23,8 @@ template class RotatedSPOsT; namespace testing { -OptVariablesType& -getMyVarsFull(RotatedSPOsT& rot); -OptVariablesType& -getMyVarsFull(RotatedSPOsT& rot); +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); std::vector>& getHistoryParams(RotatedSPOsT& rot); std::vector>& @@ -194,40 +192,61 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); - void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, - const int& LastIndex) override; - - void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex) override; - - void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, - const std::vector& Coeff, const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const GradMatrix& grads_up, - const GradMatrix& grads_dn, const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) override; - - void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const std::vector& detData_up, - const std::vector>& lookup_tbl) override; + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) override; + + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const ValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) override; // helper function to evaluatederivative; evaluate orbital rotation // parameter derivative using table method @@ -263,22 +282,16 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT opt_obj_refs.push_back(*this); } - void - checkInVariablesExclusive(OptVariablesType& active) override + void checkInVariablesExclusive(OptVariablesTypeT& active) override { if (this->myVars.size()) active.insertFrom(this->myVars); } - void - checkOutVariables(const OptVariablesType& active) override - { - this->myVars.getIndex(active); - } + void checkOutVariables(const OptVariablesTypeT& active) override { this->myVars.getIndex(active); } /// reset - void - resetParametersExclusive(const OptVariablesType& active) override; + void resetParametersExclusive(const OptVariablesTypeT& active) override; void writeVariationalParameters(hdf_archive& hout) override; @@ -322,11 +335,14 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT Phi->evaluateDetRatios(VP, psi, psiinv, ratios); } - void - evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex) override; + void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) override; void evaluateVGH(const ParticleSetT& P, int iat, ValueVector& psi, @@ -407,7 +423,7 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT std::vector params; /// Full set of rotation matrix parameters for use in global rotation method - OptVariablesType myVarsFull; + OptVariablesTypeT myVarsFull; /// List of previously applied parameters std::vector> history_params_; @@ -415,10 +431,8 @@ class RotatedSPOsT : public SPOSetT, public OptimizableObjectT /// Use global rotation or history list bool use_global_rot_ = true; - friend OptVariablesType& - testing::getMyVarsFull(RotatedSPOsT& rot); - friend OptVariablesType& - testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); friend std::vector>& diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 6ba0aa9936c..0313f3ebfcd 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -25,6 +25,7 @@ namespace qmcplusplus { using SPOSet = SPOSetT; +using SPOSetPtr = SPOSet*; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp index 2e6521e115e..d330c9e2055 100644 --- a/src/QMCWaveFunctions/SPOSetT.cpp +++ b/src/QMCWaveFunctions/SPOSetT.cpp @@ -46,9 +46,8 @@ SPOSetT::extractOptimizableObjectRefs(UniqueOptObjRefsT&) "must be overloaded when the SPOSet is optimizable."); } -template -void -SPOSetT::checkOutVariables(const OptVariablesType& active) +template +void SPOSetT::checkOutVariables(const OptVariablesTypeT& active) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + @@ -283,11 +282,13 @@ SPOSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) "must be overloaded when the SPOSet supports rotation."); } -template -void -SPOSetT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const int& FirstIndex, const int& LastIndex) +template +void SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + @@ -295,11 +296,12 @@ SPOSetT::evaluateDerivatives(ParticleSetT& P, "must be overloaded when the SPOSet is optimizable."); } -template -void -SPOSetT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex) +template +void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + @@ -307,12 +309,15 @@ SPOSetT::evaluateDerivativesWF(ParticleSetT& P, "must be overloaded when the SPOSet is optimizable."); } -template -void -SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex) +template +void SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) { // Match the fallback in WaveFunctionComponent that evaluates just the // ratios @@ -324,21 +329,33 @@ SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, "must be overloaded when the SPOSet is optimizable."); } -template -void -SPOSetT::evaluateDerivatives(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - Vector& dhpsioverpsi, const T& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const GradMatrix& grads_up, const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, const ValueMatrix& B_lapl, - const std::vector& detData_up, const size_t N1, const size_t N2, - const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl) +template +void SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + @@ -346,17 +363,22 @@ SPOSetT::evaluateDerivatives(ParticleSetT& P, "must be overloaded when the SPOSet is optimizable."); } -template -void -SPOSetT::evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, const std::vector& C2node_dn, - const ValueVector& detValues_up, const ValueVector& detValues_dn, - const ValueMatrix& M_up, const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const ValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { if (isOptimizable()) throw std::logic_error("Bug!! " + getClassName() + diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h index 2fc6d2508d9..26cd0652f0f 100644 --- a/src/QMCWaveFunctions/SPOSetT.h +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -41,14 +41,10 @@ template class SPOSetT; namespace testing { -OptVariablesType& -getMyVars(SPOSetT& spo); -OptVariablesType& -getMyVars(SPOSetT& spo); -OptVariablesType>& -getMyVars(SPOSetT>& spo); -OptVariablesType>& -getMyVars(SPOSetT>& spo); +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); } // namespace testing /** base class for Single-particle orbital sets @@ -139,8 +135,7 @@ class SPOSetT : public QMCTraits /** check out variational optimizable variables * @param active a super set of optimizable variables */ - virtual void - checkOutVariables(const OptVariablesType& active); + virtual void checkOutVariables(const OptVariablesTypeT& active); /// Query if this SPOSet uses OpenMP offload virtual bool @@ -181,49 +176,70 @@ class SPOSetT : public QMCTraits /// Parameter derivatives of the wavefunction and the Laplacian of the /// wavefunction - virtual void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const int& FirstIndex, - const int& LastIndex); + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex); /// Parameter derivatives of the wavefunction - virtual void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, int FirstIndex, - int LastIndex); + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex); /** Evaluate the derivative of the optimized orbitals with respect to the * parameters this is used only for MSD, to be refined for better serving * both single and multi SD */ - virtual void - evaluateDerivatives(ParticleSetT& P, const OptVariablesType& optvars, - Vector& dlogpsi, Vector& dhpsioverpsi, const T& psiCurrent, - const std::vector& Coeff, const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const GradMatrix& grads_up, - const GradMatrix& grads_dn, const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const GradMatrix& B_grad, - const ValueMatrix& B_lapl, const std::vector& detData_up, - const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, - const std::vector>& lookup_tbl); + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); /** Evaluate the derivative of the optimized orbitals with respect to the * parameters this is used only for MSD, to be refined for better serving * both single and multi SD */ - virtual void - evaluateDerivativesWF(ParticleSetT& P, - const OptVariablesType& optvars, Vector& dlogpsi, - const ValueType& psiCurrent, const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, const ValueVector& detValues_up, - const ValueVector& detValues_dn, const ValueMatrix& M_up, - const ValueMatrix& M_dn, const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, const std::vector& detData_up, - const std::vector>& lookup_tbl); + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const ValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl); /** set the OrbitalSetSize * @param norbs number of single-particle orbitals @@ -258,11 +274,14 @@ class SPOSetT : public QMCTraits /// Determinant ratios and parameter derivatives of the wavefunction for /// virtual moves - virtual void - evaluateDerivRatios(const VirtualParticleSetT& VP, - const OptVariablesType& optvars, ValueVector& psi, - const ValueVector& psiinv, std::vector& ratios, Matrix& dratios, - int FirstIndex, int LastIndex); + virtual void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex); /** evaluate determinant ratios for virtual moves, e.g., sphere move for * nonlocalPP, of multiple walkers @@ -629,16 +648,12 @@ class SPOSetT : public QMCTraits /// number of Single-particle orbitals IndexType OrbitalSetSize; /// Optimizable variables - OptVariablesType myVars; - - friend OptVariablesType& - testing::getMyVars(SPOSetT& spo); - friend OptVariablesType& - testing::getMyVars(SPOSetT& spo); - friend OptVariablesType>& - testing::getMyVars(SPOSetT>& spo); - friend OptVariablesType>& - testing::getMyVars(SPOSetT>& spo); + OptVariablesTypeT myVars; + + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); }; template diff --git a/src/QMCWaveFunctions/VariableSet.cpp b/src/QMCWaveFunctions/VariableSet.cpp deleted file mode 100644 index e9ee94daf1f..00000000000 --- a/src/QMCWaveFunctions/VariableSet.cpp +++ /dev/null @@ -1,314 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "VariableSet.h" -#include "io/hdf/hdf_archive.h" -#include "Host/sysutil.h" -#include -#include -#include -#include -#include - -using std::setw; - -namespace optimize -{ -void VariableSet::clear() -{ - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); -} - -void VariableSet::insertFrom(const VariableSet& input) -{ - for (int i = 0; i < input.size(); ++i) - { - iterator loc = find(input.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input.Index[i]); - NameAndValue.push_back(input.NameAndValue[i]); - ParameterType.push_back(input.ParameterType[i]); - Recompute.push_back(input.Recompute[i]); - } - else - (*loc).second = input.NameAndValue[i].second; - } - num_active_vars = input.num_active_vars; -} - -void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& input_2) -{ - value_type sum_val; - std::string vname; - - // Check that objects to be summed together have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error("Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) - { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be adding them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error("Inconsistent parameters exist in the two provided " - "variable sets."); - - sum_val = input_1.NameAndValue[i].second + input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't summed between the - // objects, but the parameter values themselves need to use the summed - // values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, sum_val)); - } - else - (*loc).second = sum_val; - } - num_active_vars = input_1.num_active_vars; -} - -void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& input_2) -{ - value_type diff_val; - std::string vname; - - // Check that objects to be subtracted have the same number of active - // variables. - if (input_1.num_active_vars != input_2.num_active_vars) - throw std::runtime_error("Inconsistent number of parameters in two provided " - "variable sets."); - - for (int i = 0; i < input_1.size(); ++i) - { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be subtracting them. - if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) - throw std::runtime_error("Inconsistent parameters exist in the two provided " - "variable sets."); - - diff_val = input_1.NameAndValue[i].second - input_2.NameAndValue[i].second; - - iterator loc = find(input_1.name(i)); - if (loc == NameAndValue.end()) - { - Index.push_back(input_1.Index[i]); - ParameterType.push_back(input_1.ParameterType[i]); - Recompute.push_back(input_1.Recompute[i]); - - // We can reuse the above values, which aren't subtracted between the - // objects, but the parameter values themselves need to use the - // subtracted values. - vname = input_1.NameAndValue[i].first; - NameAndValue.push_back(pair_type(vname, diff_val)); - } - else - (*loc).second = diff_val; - } - num_active_vars = input_1.num_active_vars; -} - -void VariableSet::removeInactive() -{ - std::vector valid(Index); - std::vector acopy(NameAndValue); - std::vector bcopy(Recompute), ccopy(ParameterType); - num_active_vars = 0; - Index.clear(); - NameAndValue.clear(); - Recompute.clear(); - ParameterType.clear(); - for (int i = 0; i < valid.size(); ++i) - { - if (valid[i] > -1) - { - Index.push_back(num_active_vars++); - NameAndValue.push_back(acopy[i]); - Recompute.push_back(bcopy[i]); - ParameterType.push_back(ccopy[i]); - } - } -} - -void VariableSet::resetIndex() -{ - num_active_vars = 0; - for (int i = 0; i < Index.size(); ++i) - { - Index[i] = (Index[i] < 0) ? -1 : num_active_vars++; - } -} - -void VariableSet::getIndex(const VariableSet& selected) -{ - num_active_vars = 0; - for (int i = 0; i < NameAndValue.size(); ++i) - { - Index[i] = selected.getIndex(NameAndValue[i].first); - if (Index[i] >= 0) - num_active_vars++; - } -} - -int VariableSet::getIndex(const std::string& vname) const -{ - int loc = 0; - while (loc != NameAndValue.size()) - { - if (NameAndValue[loc].first == vname) - return Index[loc]; - ++loc; - } - return -1; -} - -void VariableSet::setIndexDefault() -{ - for (int i = 0; i < Index.size(); ++i) - Index[i] = i; -} - -void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) const -{ - std::string pad_str = std::string(leftPadSpaces, ' '); - int max_name_len = 0; - if (NameAndValue.size() > 0) - max_name_len = - std::max_element(NameAndValue.begin(), NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) { - return e1.first.length() < e2.first.length(); - })->first.length(); - - int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading value, period, and exponent. - int max_type_len = 1; - int max_recompute_len = 1; - int max_use_len = 3; - int max_index_len = 1; - if (printHeader) - { - max_name_len = std::max(max_name_len, 4); // size of "Name" header - max_type_len = 4; - max_recompute_len = 9; - max_index_len = 5; - os << pad_str << setw(max_name_len) << "Name" - << " " << setw(max_value_len) << "Value" - << " " << setw(max_type_len) << "Type" - << " " << setw(max_recompute_len) << "Recompute" - << " " << setw(max_use_len) << "Use" - << " " << setw(max_index_len) << "Index" << std::endl; - os << pad_str << std::setfill('-') << setw(max_name_len) << "" - << " " << setw(max_value_len) << "" - << " " << setw(max_type_len) << "" - << " " << setw(max_recompute_len) << "" - << " " << setw(max_use_len) << "" - << " " << setw(max_index_len) << "" << std::endl; - os << std::setfill(' '); - } - - for (int i = 0; i < NameAndValue.size(); ++i) - { - os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " << std::setprecision(6) << std::scientific - << setw(max_value_len) << NameAndValue[i].second << " " << setw(max_type_len) << ParameterType[i].second << " " - << setw(max_recompute_len) << Recompute[i].second << " "; - - os << std::defaultfloat; - - if (Index[i] < 0) - os << setw(max_use_len) << "OFF" << std::endl; - else - os << setw(max_use_len) << "ON" - << " " << setw(max_index_len) << Index[i] << std::endl; - } -} - -void VariableSet::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const -{ - hout.create(filename); - - // File Versioning - // 1.0.0 Initial file version - // 1.1.0 Files could have object-specific data from OptimizableObject::read/writeVariationalParameters - std::vector vp_file_version{1, 1, 0}; - hout.write(vp_file_version, "version"); - - std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); - hout.write(timestamp, "timestamp"); - - hout.push("name_value_lists"); - - std::vector param_values; - std::vector param_names; - for (auto& pair_it : NameAndValue) - { - param_names.push_back(pair_it.first); - param_values.push_back(pair_it.second); - } - - hout.write(param_names, "parameter_names"); - hout.write(param_values, "parameter_values"); - hout.pop(); -} - -void VariableSet::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin) -{ - if (!hin.open(filename, H5F_ACC_RDONLY)) - { - std::ostringstream err_msg; - err_msg << "Unable to open VP file: " << filename; - throw std::runtime_error(err_msg.str()); - } - - try - { - hin.push("name_value_lists", false); - } - catch (std::runtime_error&) - { - std::ostringstream err_msg; - err_msg << "The group name_value_lists in not present in file: " << filename; - throw std::runtime_error(err_msg.str()); - } - - std::vector param_values; - hin.read(param_values, "parameter_values"); - - std::vector param_names; - hin.read(param_names, "parameter_names"); - - for (int i = 0; i < param_names.size(); i++) - { - std::string& vp_name = param_names[i]; - // Find and set values by name. - // Values that are not present do not get added. - if (find(vp_name) != end()) - (*this)[vp_name] = param_values[i]; - } - - hin.pop(); -} - - -} // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSet.h b/src/QMCWaveFunctions/VariableSet.h index 66ba4da3bfb..c2c88a271c4 100644 --- a/src/QMCWaveFunctions/VariableSet.h +++ b/src/QMCWaveFunctions/VariableSet.h @@ -12,323 +12,15 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H #define QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H -#include "config.h" -#include -#include -#include -#include -#include "Configuration.h" -namespace qmcplusplus -{ -class hdf_archive; -} +#include "Configuration.h" +#include "QMCWaveFunctions/VariableSetT.h" namespace optimize { -/** An enum useful for determining the type of parameter is being optimized. -* knowing this in the opt routine can reduce the computational load. -*/ -enum -{ - OTHER_P = 0, - LOGLINEAR_P, //B-spline Jastrows - LOGLINEAR_K, //K space Jastrows - LINEAR_P, //Multi-determinant coefficients - SPO_P, //SPO set Parameters - BACKFLOW_P //Backflow parameters -}; - -/** class to handle a set of variables that can be modified during optimizations - * - * A serialized container of named variables. - */ -struct VariableSet -{ - using value_type = qmcplusplus::QMCTraits::ValueType; - using real_type = qmcplusplus::QMCTraits::RealType; - - using pair_type = std::pair; - using index_pair_type = std::pair; - using iterator = std::vector::iterator; - using const_iterator = std::vector::const_iterator; - using size_type = std::vector::size_type; - - ///number of active variables - int num_active_vars; - /** store locator of the named variable - * - * if(Index[i] == -1), the named variable is not active - */ - std::vector Index; - std::vector NameAndValue; - std::vector ParameterType; - std::vector Recompute; - - ///default constructor - inline VariableSet() : num_active_vars(0) {} - ///viturval destructor for safety - virtual ~VariableSet() = default; - /** if any of Index value is not zero, return true - */ - inline bool is_optimizable() const { return num_active_vars > 0; } - ///return the number of active variables - inline int size_of_active() const { return num_active_vars; } - ///return the first const_iterator - inline const_iterator begin() const { return NameAndValue.begin(); } - ///return the last const_iterator - inline const_iterator end() const { return NameAndValue.end(); } - ///return the first iterator - inline iterator begin() { return NameAndValue.begin(); } - ///return the last iterator - inline iterator end() { return NameAndValue.end(); } - ///return the size - inline size_type size() const { return NameAndValue.size(); } - ///return the locator of the i-th Index - inline int where(int i) const { return Index[i]; } - /** return the iterator of a named parameter - * @param vname name of a parameter - * @return the locator of vname - * - * If vname is not found among the Names, return NameAndValue.end() - * so that ::end() member function can be used to validate the iterator. - */ - inline iterator find(const std::string& vname) - { - return std::find_if(NameAndValue.begin(), NameAndValue.end(), - [&vname](const auto& value) { return value.first == vname; }); - } - - /** return the Index vaule for the named parameter - * @param vname name of the variable - * - * If vname is not found in this variables, return -1; - */ - int getIndex(const std::string& vname) const; - - /* return the NameAndValue index for the named parameter - * @ param vname name of the variable - * - * Differs from getIndex by not relying on the indices cached in Index - * myVars[i] will always return the value of the parameter if it is stored - * regardless of whether or not the Index array has been correctly reset - * - * if vname is not found, return -1 - * - */ - inline int getLoc(const std::string& vname) const - { - int loc = 0; - while (loc != NameAndValue.size()) - { - if (NameAndValue[loc].first == vname) - return loc; - ++loc; - } - return -1; - } - - inline void insert(const std::string& vname, value_type v, bool enable = true, int type = OTHER_P) - { - iterator loc = find(vname); - int ind_loc = loc - NameAndValue.begin(); - if (loc == NameAndValue.end()) // && enable==true) - { - Index.push_back(ind_loc); - NameAndValue.push_back(pair_type(vname, v)); - ParameterType.push_back(index_pair_type(vname, type)); - Recompute.push_back(index_pair_type(vname, 1)); - } - //disable it if enable == false - if (!enable) - Index[ind_loc] = -1; - } - - inline void setParameterType(int type) - { - std::vector::iterator PTit(ParameterType.begin()), PTend(ParameterType.end()); - while (PTit != PTend) - { - (*PTit).second = type; - PTit++; - } - } - - inline void getParameterTypeList(std::vector& types) const - { - auto ptit(ParameterType.begin()), ptend(ParameterType.end()); - types.resize(ptend - ptit); - auto tit(types.begin()); - while (ptit != ptend) - (*tit++) = (*ptit++).second; - } - - - /** equivalent to std::map[string] operator - */ - inline value_type& operator[](const std::string& vname) - { - iterator loc = find(vname); - if (loc == NameAndValue.end()) - { - Index.push_back(-1); - NameAndValue.push_back(pair_type(vname, 0)); - ParameterType.push_back(index_pair_type(vname, 0)); - Recompute.push_back(index_pair_type(vname, 1)); - return NameAndValue.back().second; - } - return (*loc).second; - } - - - /** return the name of i-th variable - * @param i index - */ - const std::string& name(int i) const { return NameAndValue[i].first; } - - /** return the i-th value - * @param i index - */ - inline value_type operator[](int i) const { return NameAndValue[i].second; } - - /** assign the i-th value - * @param i index - */ - inline value_type& operator[](int i) { return NameAndValue[i].second; } - - /** get the i-th parameter's type - * @param i index - */ - inline int getType(int i) const { return ParameterType[i].second; } - - inline bool recompute(int i) const { return (Recompute[i].second == 1); } - - inline int& recompute(int i) { return Recompute[i].second; } - - inline void setComputed() - { - for (int i = 0; i < Recompute.size(); i++) - { - if (ParameterType[i].second == LOGLINEAR_P) - Recompute[i].second = 0; - else if (ParameterType[i].second == LOGLINEAR_K) - Recompute[i].second = 0; - else - Recompute[i].second = 1; - } - } - - inline void setRecompute() - { - for (int i = 0; i < Recompute.size(); i++) - Recompute[i].second = 1; - } - - /** clear the variable set - * - * Remove all the data. - */ - void clear(); - - /** insert a VariableSet to the list - * @param input variables - */ - void insertFrom(const VariableSet& input); - - /** sum together the values of the optimizable parameter values in - * two VariableSet objects, and set this object's values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromSum(const VariableSet& input_1, const VariableSet& input_2); - - /** take the difference (input_1-input_2) of values of the optimizable - * parameter values in two VariableSet objects, and set this object's - * values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromDiff(const VariableSet& input_1, const VariableSet& input_2); - - /** activate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, Index is updated - * - * The status of a variable that is not included in the [first,last) - * remains the same. - */ - template - void activate(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - iterator loc = find(*first++); - if (loc != NameAndValue.end()) - { - int i = loc - NameAndValue.begin(); - if (Index[i] < 0) - Index[i] = num_active_vars++; - } - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** deactivate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, the variales are removed and Index is updated - */ - template - void disable(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - int loc = find(*first++) - NameAndValue.begin(); - if (loc < NameAndValue.size()) - Index[loc] = -1; - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** reset Index - */ - void resetIndex(); - /** remove inactive variables and trim the internal data - */ - void removeInactive(); - - /** set the index table of this VariableSet - * @param selected input variables - * - * This VariableSet is a subset of selected. - */ - void getIndex(const VariableSet& selected); - - /** set default Indices, namely all the variables are active - */ - void setIndexDefault(); - - void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; - - // Save variational parameters to an HDF file - void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const; - - /// Read variational parameters from an HDF file. - /// This assumes VariableSet is already set up. - void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin); -}; +using VariableSet = VariableSetT; } // namespace optimize #endif diff --git a/src/QMCWaveFunctions/VariableSetT.h b/src/QMCWaveFunctions/VariableSetT.h index 9a0675a184c..807ede25b1d 100644 --- a/src/QMCWaveFunctions/VariableSetT.h +++ b/src/QMCWaveFunctions/VariableSetT.h @@ -20,7 +20,6 @@ #include #include #include -#include "VariableSet.h" #include "OrbitalSetTraits.h" namespace qmcplusplus @@ -33,23 +32,24 @@ namespace optimize /** An enum useful for determining the type of parameter is being optimized. * knowing this in the opt routine can reduce the computational load. */ -// enum -// { -// OTHER_P = 0, -// LOGLINEAR_P, //B-spline Jastrows -// LOGLINEAR_K, //K space Jastrows -// LINEAR_P, //Multi-determinant coefficients -// SPO_P, //SPO set Parameters -// BACKFLOW_P //Backflow parameters -// }; +enum +{ + OTHER_P = 0, + LOGLINEAR_P, //B-spline Jastrows + LOGLINEAR_K, //K space Jastrows + LINEAR_P, //Multi-determinant coefficients + SPO_P, //SPO set Parameters + BACKFLOW_P //Backflow parameters +}; /** class to handle a set of variables that can be modified during optimizations * * A serialized container of named variables. */ -template -struct VariableSetT +template +class VariableSetT { +public: using value_type = typename qmcplusplus::OrbitalSetTraits::ValueType; using real_type = typename qmcplusplus::OrbitalSetTraits::RealType; diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt index b414f0158b4..0893abecdf4 100644 --- a/src/QMCWaveFunctions/tests/CMakeLists.txt +++ b/src/QMCWaveFunctions/tests/CMakeLists.txt @@ -140,7 +140,7 @@ set(DETERMINANT_SRC test_ci_configuration.cpp test_multi_slater_determinant.cpp) -add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp ConstantSPOSetT.cpp) +add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSetT.cpp) target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(sposets_for_testing PUBLIC qmcwfs) diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp deleted file mode 100644 index f9ad56e3300..00000000000 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp +++ /dev/null @@ -1,100 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2023 Raymond Clay and QMCPACK developers. -// -// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -// -// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "QMCWaveFunctions/tests/ConstantSPOSet.h" - -namespace qmcplusplus -{ -ConstantSPOSet::ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals) - : SPOSet(my_name), numparticles_(nparticles) -{ - OrbitalSetSize = norbitals; - ref_psi_.resize(numparticles_, OrbitalSetSize); - ref_egrad_.resize(numparticles_, OrbitalSetSize); - ref_elapl_.resize(numparticles_, OrbitalSetSize); - - ref_psi_ = 0.0; - ref_egrad_ = 0.0; - ref_elapl_ = 0.0; -}; - -std::unique_ptr ConstantSPOSet::makeClone() const -{ - auto myclone = std::make_unique(my_name_, numparticles_, OrbitalSetSize); - myclone->setRefVals(ref_psi_); - myclone->setRefEGrads(ref_egrad_); - myclone->setRefELapls(ref_elapl_); - return myclone; -}; - -std::string ConstantSPOSet::getClassName() const { return "ConstantSPOSet"; }; - -void ConstantSPOSet::checkOutVariables(const opt_variables_type& active) -{ - APP_ABORT("ConstantSPOSet should not call checkOutVariables"); -}; - -void ConstantSPOSet::setOrbitalSetSize(int norbs) { APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); } - -void ConstantSPOSet::setRefVals(const ValueMatrix& vals) -{ - assert(vals.cols() == OrbitalSetSize); - assert(vals.rows() == numparticles_); - ref_psi_ = vals; -}; -void ConstantSPOSet::setRefEGrads(const GradMatrix& grads) -{ - assert(grads.cols() == OrbitalSetSize); - assert(grads.rows() == numparticles_); - ref_egrad_ = grads; -}; -void ConstantSPOSet::setRefELapls(const ValueMatrix& lapls) -{ - assert(lapls.cols() == OrbitalSetSize); - assert(lapls.rows() == numparticles_); - ref_elapl_ = lapls; -}; - -void ConstantSPOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - const auto* vp = dynamic_cast(&P); - int ptcl = vp ? vp->refPtcl : iat; - assert(psi.size() == OrbitalSetSize); - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - psi[iorb] = ref_psi_(ptcl, iorb); -}; - -void ConstantSPOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - { - psi[iorb] = ref_psi_(iat, iorb); - dpsi[iorb] = ref_egrad_(iat, iorb); - d2psi[iorb] = ref_elapl_(iat, iorb); - } -}; - -void ConstantSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } -} -} //namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.h b/src/QMCWaveFunctions/tests/ConstantSPOSet.h index c42461856de..5e139d57bad 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSet.h @@ -13,75 +13,12 @@ #ifndef QMCPLUSPLUS_CONSTANTSPOSET_H #define QMCPLUSPLUS_CONSTANTSPOSET_H -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/tests/ConstantSPOSetT.h" namespace qmcplusplus { -/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc., - * These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc. - * Exists to provide deterministic and known output to objects requiring SPOSet evaluations. - * - */ -class ConstantSPOSet : public SPOSet -{ -public: - ConstantSPOSet(const std::string& my_name) = delete; - - //Constructor needs number of particles and number of orbitals. This is the minimum - //amount of information needed to sanely construct all data members and perform size - //checks later. - ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals); - - std::unique_ptr makeClone() const override; - - std::string getClassName() const override; - - void checkOutVariables(const opt_variables_type& active) override; - - void setOrbitalSetSize(int norbs) override; - - /** - * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. - * @param Nelec x Nion ValueType matrix of \phi_j(r_i) - * @return void - */ - void setRefVals(const ValueMatrix& vals); - /** - * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_. - * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) - * @return void - */ - void setRefEGrads(const GradMatrix& grads); - /** - * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_. - * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) - * @return void - */ - void setRefELapls(const ValueMatrix& lapls); - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - -private: - const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices. - /// To ensure consistent array sizing and enforcement, - /// we agree at construction how large these matrices will be. - /// norb is stored in SPOSet::OrbitalSetSize. - - //Value, electron gradient, and electron laplacian at "reference configuration". - //i.e. before any attempted moves. +using ConstantSPOSet = ConstantSPOSetT; - ValueMatrix ref_psi_; - GradMatrix ref_egrad_; - ValueMatrix ref_elapl_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp index ecdb5dd696a..5ada9b4f9d3 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp @@ -43,9 +43,8 @@ ConstantSPOSetT::makeClone() const return myclone; } -template -void -ConstantSPOSetT::checkOutVariables(const OptVariablesType& active) +template +void ConstantSPOSetT::checkOutVariables(const OptVariablesTypeT& active) { APP_ABORT("ConstantSPOSet should not call checkOutVariables"); }; diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h index d1ee5b24f76..335796df964 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h @@ -49,8 +49,7 @@ class ConstantSPOSetT : public SPOSetT return "ConstantSPOSet"; }; - void - checkOutVariables(const OptVariablesType& active) final; + void checkOutVariables(const OptVariablesTypeT& active) final; void setOrbitalSetSize(int norbs) final;