diff --git a/src/Particle/ParticleBase/RandomSeqGenerator.h b/src/Particle/ParticleBase/RandomSeqGenerator.h index b79ba65ba7..58ac332c7c 100644 --- a/src/Particle/ParticleBase/RandomSeqGenerator.h +++ b/src/Particle/ParticleBase/RandomSeqGenerator.h @@ -76,6 +76,12 @@ inline void makeGaussRandomWithEngine(std::vector>& a, RG& rng) assignGaussRand(&(a[0][0]), a.size() * D, rng); } +template +inline void makeGaussRandomWithEngine(std::vector& a, RG& rng) +{ + assignGaussRand(&(a[0]), a.size(), rng); +} + template inline void makeGaussRandomWithEngine(ParticleAttrib& a, RG& rng) { diff --git a/src/QMCDrivers/CMakeLists.txt b/src/QMCDrivers/CMakeLists.txt index 83e0ed77ad..1dd51b4008 100644 --- a/src/QMCDrivers/CMakeLists.txt +++ b/src/QMCDrivers/CMakeLists.txt @@ -49,6 +49,7 @@ set(QMCDRIVERS QMCUpdateBase.cpp GreenFunctionModifiers/DriftModifierBuilder.cpp GreenFunctionModifiers/DriftModifierUNR.cpp + MCCoords.cpp VMC/VMCUpdatePbyP.cpp VMC/VMCUpdateAll.cpp VMC/VMCFactory.cpp diff --git a/src/QMCDrivers/ContextForSteps.cpp b/src/QMCDrivers/ContextForSteps.cpp index 8a33760b26..0289994b92 100644 --- a/src/QMCDrivers/ContextForSteps.cpp +++ b/src/QMCDrivers/ContextForSteps.cpp @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 developers. +// Copyright (c) 2022 developers. // // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory // @@ -14,22 +14,12 @@ namespace qmcplusplus { + ContextForSteps::ContextForSteps(int num_walkers, - int num_particles, - std::vector> particle_group_indexes, - RandomGenerator& random_gen) + int num_particles, + std::vector> particle_group_indexes, + RandomGenerator& random_gen) : particle_group_indexes_(particle_group_indexes), random_gen_(random_gen) -{ - /** glambda to create type T with constructor T(int) and put in it unique_ptr - * - * captures num_particles to use as argument to constructor - * gets T for type unique_ptr unique is templated on - */ - auto constructT = [num_particles](auto& unique) { - unique.reset(new typename std::remove_pointer::type(num_particles)); - }; - - walker_deltas_.resize(num_walkers * num_particles); -} +{} } // namespace qmcplusplus diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h index 8ff6de05fd..cd32b624eb 100644 --- a/src/QMCDrivers/ContextForSteps.h +++ b/src/QMCDrivers/ContextForSteps.h @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 developers. +// Copyright (c) 2022 developers. // // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory // @@ -19,9 +19,11 @@ #include "Particle/Walker.h" #include "QMCDrivers/Crowd.h" #include "ParticleBase/RandomSeqGenerator.h" +#include "MCCoords.hpp" namespace qmcplusplus { + /** Thread local context for moving walkers * * created once per driver per crowd @@ -46,22 +48,9 @@ class ContextForSteps int get_num_groups() const { return particle_group_indexes_.size(); } RandomGenerator& get_random_gen() { return random_gen_; } - void nextDeltaRs(size_t num_rs) - { - // hate to repeat this pattern, this should never resize. - walker_deltas_.resize(num_rs); - makeGaussRandomWithEngine(walker_deltas_, random_gen_); - } - - std::vector& get_walker_deltas() { return walker_deltas_; } - auto deltaRsBegin() { return walker_deltas_.begin(); }; - int getPtclGroupStart(int group) const { return particle_group_indexes_[group].first; } int getPtclGroupEnd(int group) const { return particle_group_indexes_[group].second; } - protected: - std::vector walker_deltas_; - /** indexes of start and stop of each particle group; * * Seems like these should be iterators but haven't thought through the implications. diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp index b0123832c1..a621d02640 100644 --- a/src/QMCDrivers/DMC/DMCBatched.cpp +++ b/src/QMCDrivers/DMC/DMCBatched.cpp @@ -25,6 +25,7 @@ #include "Utilities/ProgressReportEngine.h" #include "QMCDrivers/DMC/WalkerControl.h" #include "QMCDrivers/SFNBranch.h" +#include "QMCDrivers/ContextForSteps.h" #include "MemoryUsage.h" namespace qmcplusplus @@ -60,6 +61,7 @@ void DMCBatched::setNonLocalMoveHandler(QMCHamiltonian& golden_hamiltonian) dmcdriver_input_.get_alpha(), dmcdriver_input_.get_gamma()); } +template void DMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, @@ -103,13 +105,13 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, const int num_walkers = crowd.size(); //This generates an entire steps worth of deltas. - step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles()); - auto it_delta_r = step_context.deltaRsBegin(); + std::size_t num_deltas = num_walkers * sft.population.get_num_particles(); std::vector grads_now(num_walkers, TrialWaveFunction::GradType(0.0)); std::vector grads_new(num_walkers, TrialWaveFunction::GradType(0.0)); std::vector ratios(num_walkers, TrialWaveFunction::PsiValueType(0.0)); - std::vector drifts(num_walkers, 0.0); + auto deltas = generateDeltas(step_context.get_random_gen(), num_deltas); + std::vector log_gf(num_walkers, 0.0); std::vector log_gb(num_walkers, 0.0); std::vector prob(num_walkers, 0.0); @@ -130,18 +132,28 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, ScopedTimer pbyp_local_timer(timers.movepbyp_timer); for (int ig = 0; ig < step_context.get_num_groups(); ++ig) { - RealType tauovermass = sft.qmcdrv_input.get_tau() * sft.population.get_ptclgrp_inv_mass()[ig]; - RealType oneover2tau = 0.5 / (tauovermass); - RealType sqrttau = std::sqrt(tauovermass); + auto createTaus = [&](int ig) -> Taus { + if constexpr (std::is_same>::value) + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig], + sft.qmcdrv_input.get_spin_mass()); + else + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig]); + }; + + Taus taus = createTaus(ig); twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig); - int start_index = step_context.getPtclGroupStart(ig); - int end_index = step_context.getPtclGroupEnd(ig); + int start_index = step_context.getPtclGroupStart(ig); + int end_index = step_context.getPtclGroupEnd(ig); + std::size_t delta_offset = 0; + // end_index is one beyond last index to the group + MCCOORDS drifts; + drifts.resize(end_index - start_index - 1); for (int iat = start_index; iat < end_index; ++iat) { - auto delta_r_start = it_delta_r + iat * num_walkers; - auto delta_r_end = delta_r_start + num_walkers; + auto delta_start = delta_offset + iat * num_walkers; + auto delta_end = delta_start + num_walkers; //This is very useful thing to be able to look at in the debugger #ifndef NDEBUG @@ -154,17 +166,22 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, #endif //get the displacement twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now); - sft.drift_modifier.getDrifts(tauovermass, grads_now, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_now, drifts); - std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), - [sqrttau](PosType& drift, PosType& delta_r) { return drift + (sqrttau * delta_r); }); + + // std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), + // [sqrttau](PosType& drift, PosType& delta_r) { return drift + (sqrttau * delta_r); }); // only DMC does this // TODO: rr needs a real name std::vector rr(num_walkers, 0.0); - assert(rr.size() == delta_r_end - delta_r_start); - std::transform(delta_r_start, delta_r_end, rr.begin(), - [tauovermass](auto& delta_r) { return tauovermass * dot(delta_r, delta_r); }); + assert(rr.size() == delta_end - delta_start - 1); + for(std::size_t ip = delta_start; ip < delta_end; ++ip) + { + rr[ip] = taus.tauovermass * dot(deltas.rs[ip], deltas.rs[ip]); + } + //std::transform(delta_start, delta_end, rr.begin(), + // [tauovermass](auto& delta_r) { return tauovermass * dot(delta_r, delta_r); }); // in DMC this was done here, changed to match VMCBatched pending factoring to common source // if (rr > m_r2max) @@ -176,7 +193,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, for (int i = 0; i < rr.size(); ++i) assert(std::isfinite(rr[i])); #endif - ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts); + ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts.rs); twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new); @@ -196,18 +213,25 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, rr_proposed[iw] += rr[iw]; } - std::transform(delta_r_start, delta_r_end, log_gf.begin(), [](auto& delta_r) { + for(std::size_t ip = delta_start; ip < delta_end; ++ip) + { constexpr RealType mhalf(-0.5); - return mhalf * dot(delta_r, delta_r); - }); + log_gf[ip] = mhalf * dot(deltas.rs[ip], deltas.rs[ip]); + } + // std::transform(delta_r_start, delta_r_end, log_gf.begin(), [](auto& delta_r) { + // constexpr RealType mhalf(-0.5); + // return mhalf * dot(delta_r, delta_r); + // }); - sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_new, drifts); - std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(), - [iat](auto& elecs, auto& drift) { return elecs.get().R[iat] - elecs.get().getActivePos() - drift; }); + std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.rs.begin(), drifts.rs.begin(), + [iat](auto& elecs, auto& drift) { + return elecs.get().R[iat] - elecs.get().getActivePos() - drift; + }); - std::transform(drifts.begin(), drifts.end(), log_gb.begin(), - [oneover2tau](auto& drift) { return -oneover2tau * dot(drift, drift); }); + std::transform(drifts.rs.begin(), drifts.rs.end(), log_gb.begin(), + [taus](auto& drift) { return -taus.oneover2tau * dot(drift, drift); }); for (int iw = 0; iw < num_walkers; ++iw) prob[iw] = std::norm(ratios[iw]) * std::exp(log_gb[iw] - log_gf[iw]); @@ -344,7 +368,11 @@ void DMCBatched::runDMCStep(int crowd_id, // Are we entering the the last step of a block to recompute at? const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); const bool accumulate_this_step = true; - advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, + if(sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, + accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step); } @@ -410,7 +438,7 @@ bool DMCBatched::run() { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_)); + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts_); } print_mem("DMCBatched after initialLogEvaluation", app_summary()); @@ -444,7 +472,7 @@ bool DMCBatched::run() { ScopedTimer local_timer(timers_.run_steps_timer); dmc_state.step = step; - crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(step_contexts_), + crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, step_contexts_, std::ref(crowds_)); { diff --git a/src/QMCDrivers/DMC/DMCBatched.h b/src/QMCDrivers/DMC/DMCBatched.h index bb397a2fda..2b214b569b 100644 --- a/src/QMCDrivers/DMC/DMCBatched.h +++ b/src/QMCDrivers/DMC/DMCBatched.h @@ -104,7 +104,7 @@ class DMCBatched : public QMCDriverNew void process(xmlNodePtr cur) override; bool run() override; - + // This is the task body executed at crowd scope // it does not have access to object members by design static void runDMCStep(int crowd_id, @@ -132,6 +132,7 @@ class DMCBatched : public QMCDriverNew ///walker controller for load-balance std::unique_ptr walker_controller_; + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, @@ -142,7 +143,7 @@ class DMCBatched : public QMCDriverNew friend class qmcplusplus::testing::DMCBatchedTest; }; - + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h index cb70d532cc..4cc12b41f1 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h @@ -17,10 +17,13 @@ #include "Particle/ParticleSet.h" #include "QMCWaveFunctions/TrialWaveFunction.h" #include "QMCHamiltonians/QMCHamiltonian.h" +#include "MCCoords.hpp" namespace qmcplusplus { -/// this class implements drift modification +/** this class implements drift modification + * its a completely pointless type erasue. + */ class DriftModifierBase { public: @@ -38,7 +41,8 @@ class DriftModifierBase virtual void getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const = 0; - virtual void getDrifts(RealType tau, const std::vector& qf, std::vector&) const = 0; + virtual void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const = 0; + virtual void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const = 0; virtual bool parseXML(xmlNodePtr cur) { return true; } diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp index 82422eb4c6..d1c0c861d8 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp @@ -82,11 +82,19 @@ void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet #endif } -void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, std::vector& drift) const +void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, MCCoords& drift) const { for (int i = 0; i < qf.size(); ++i) { - getDrift(tau, qf[i], drift[i]); + getDrift(tau, qf[i], drift.rs[i]); + } +} + +void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, MCCoords& drift) const +{ + for (int i = 0; i < qf.size(); ++i) + { + getDrift(tau, qf[i], drift.rs[i]); } } diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h index 64a578007b..807753dfb9 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h @@ -14,6 +14,7 @@ #define QMCPLUSPLUS_DRIFTMODIFIER_UNR_H #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" +#include "QMCDrivers/MCCoords.hpp" namespace qmcplusplus { @@ -23,8 +24,9 @@ class DriftModifierUNR : public DriftModifierBase using RealType = QMCTraits::RealType; using PosType = QMCTraits::PosType; - void getDrifts(RealType tau, const std::vector& qf, std::vector&) const final; - + void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const final; + void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const final; + void getDrift(RealType tau, const GradType& qf, PosType& drift) const final; void getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const final; @@ -38,6 +40,7 @@ class DriftModifierUNR : public DriftModifierBase RealType a_; }; + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/MCCoords.cpp b/src/QMCDrivers/MCCoords.cpp new file mode 100644 index 0000000000..1de9a4abcb --- /dev/null +++ b/src/QMCDrivers/MCCoords.cpp @@ -0,0 +1,31 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "MCCoords.hpp" + +namespace qmcplusplus +{ +template +void MCCoords::resize(const std::size_t size) +{ + rs.resize(size); +} + +// template<> +// void MCCoords::resize(const std::size_t size) +// { +// rs.resize(size); +// spins.resize(size); +// } + +template struct MCCoords; +template struct MCCoords; +} diff --git a/src/QMCDrivers/MCCoords.hpp b/src/QMCDrivers/MCCoords.hpp new file mode 100644 index 0000000000..d14e455225 --- /dev/null +++ b/src/QMCDrivers/MCCoords.hpp @@ -0,0 +1,118 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_MCCOORDS_HPP +#define QMCPLUSPLUS_MCCOORDS_HPP + +#include "Configuration.h" +#include "type_traits/complex_help.hpp" +#include "ParticleBase/RandomSeqGenerator.h" + +#include + +namespace qmcplusplus +{ + +enum class MCCoordsTypes +{ + RS, + RSSPINS +}; + +template +struct MCCoords; + +template +struct MCCoords +{ + static constexpr MCCoordsTypes mct = MCT; + // This cleans up some other code. + void resize(const std::size_t size); + std::vector rs; +}; + +template<> +struct MCCoords +{ + static constexpr MCCoordsTypes mct = MCCoordsTypes::RSSPINS; + // This cleans up some other code. + void resize(const std::size_t size) + { + rs.resize(size); + spins.resize(size); + } + std::vector rs; + std::vector> spins; +}; + +template +struct MCCIt +{ + std::vector::iterator irs; +}; + +template<> +struct MCCIt +{ + std::vector::iterator irs; + std::vector>::iterator spins; +}; + +template +struct Taus +{ + Real tauovermass; + Real oneover2tau; + Real sqrttau ; + + Taus(Real tau, Real grp_inv_mass) { + Real tauovermass = tau * grp_inv_mass; + Real oneover2tau = 0.5 / (tauovermass); + Real sqrttau = std::sqrt(tauovermass); + } +}; + +template +struct Taus : public Taus +{ + using Base = Taus; + Real spin_tauovermass; + Real spin_oneover2tau; + Real spin_sqrttau ; + Taus(Real tau, Real grp_inv_mass, Real spin_mass) : Base(tau, grp_inv_mass) + { + Real spin_tauovermass = Base::tauovermass / spin_mass; + Real spin_oneover2tau = 0.5 / (spin_tauovermass); + Real spin_sqrttau = std::sqrt(spin_tauovermass); + } +}; + + +template +MCC generateDeltas(RNG rng, size_t num_rs) +{ + MCC mc_coords; + mc_coords.rs.resize(num_rs); + makeGaussRandomWithEngine(mc_coords.rs, rng); + // hate to repeat this pattern, this should never resize. + if constexpr (std::is_same>::value) + { + mc_coords.spins.resize(num_rs); + makeGaussRandomWithEngine(mc_coords.spins, rng); + } + return mc_coords; +} + +extern template struct MCCoords; +extern template struct MCCoords; +} // namespace qmcplusplus + +#endif diff --git a/src/QMCDrivers/QMCDriverInput.h b/src/QMCDrivers/QMCDriverInput.h index b311f12a3e..08c545d97b 100644 --- a/src/QMCDrivers/QMCDriverInput.h +++ b/src/QMCDrivers/QMCDriverInput.h @@ -71,6 +71,7 @@ class QMCDriverInput IndexType steps_between_samples_ = 1; IndexType samples_per_thread_ = 0; RealType tau_ = 0.1; + RealType spin_mass_ = 1.0; // call recompute at the end of each block in the full/mixed precision case. IndexType blocks_between_recompute_ = std::is_same::value ? 0 : 1; bool append_run_ = false; @@ -113,6 +114,7 @@ class QMCDriverInput IndexType get_steps_between_samples() const { return steps_between_samples_; } IndexType get_samples_per_thread() const { return samples_per_thread_; } RealType get_tau() const { return tau_; } + RealType get_spin_mass() const { return spin_mass_; } IndexType get_blocks_between_recompute() const { return blocks_between_recompute_; } bool get_append_run() const { return append_run_; } input::PeriodStride get_walker_dump_period() const { return walker_dump_period_; } diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp index 0fd313be26..0737c5a5cb 100644 --- a/src/QMCDrivers/QMCDriverNew.cpp +++ b/src/QMCDrivers/QMCDriverNew.cpp @@ -294,20 +294,19 @@ void QMCDriverNew::createRngsStepContexts(int num_crowds) for (int i = 0; i < num_crowds; ++i) { Rng[i].reset(RandomNumberControl::Children[i].release()); - step_contexts_[i] = std::make_unique(crowds_[i]->size(), population_.get_num_particles(), - population_.get_particle_group_indexes(), *(Rng[i])); + step_contexts_[i] = std::make_unique(crowds_[i]->size(), population_.get_num_particles(), population_.get_particle_group_indexes(), *(Rng[i])); } } void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, - UPtrVector& context_for_steps) + UPtrVector& step_contexts) { Crowd& crowd = *(crowds[crowd_id]); if (crowd.size() == 0) return; - crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen()); + crowd.setRNGForHamiltonian(step_contexts[crowd_id]->get_random_gen()); auto& ps_dispatcher = crowd.dispatchers_.ps_dispatcher_; auto& twf_dispatcher = crowd.dispatchers_.twf_dispatcher_; auto& ham_dispatcher = crowd.dispatchers_.ham_dispatcher_; diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h index 7042070e8f..8773337c9a 100644 --- a/src/QMCDrivers/QMCDriverNew.h +++ b/src/QMCDrivers/QMCDriverNew.h @@ -27,6 +27,7 @@ #define QMCPLUSPLUS_QMCDRIVERNEW_H #include +#include #include "Configuration.h" #include "Pools/PooledData.h" @@ -74,6 +75,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase using RealType = QMCTraits::RealType; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; + /** separate but similar to QMCModeEnum * * a code smell @@ -227,8 +229,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase */ void startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc); - static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, UPtrVector& step_context); - + static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, UPtrVector& step_contexts); /** should be set in input don't see a reason to set individually * @param pbyp if true, use particle-by-particle update @@ -380,7 +381,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase /** Per crowd move contexts, this is where the DistanceTables etc. reside */ - std::vector> step_contexts_; + UPtrVector step_contexts_; ///Random number generators UPtrVector Rng; @@ -429,6 +430,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase friend class qmcplusplus::testing::DMCBatchedTest; friend class qmcplusplus::testing::QMCDriverNewTestWrapper; }; + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/VMC/VMCBatched.cpp b/src/QMCDrivers/VMC/VMCBatched.cpp index 8798cce367..146cdcdc14 100644 --- a/src/QMCDrivers/VMC/VMCBatched.cpp +++ b/src/QMCDrivers/VMC/VMCBatched.cpp @@ -35,6 +35,7 @@ VMCBatched::VMCBatched(const ProjectData& project_data, collect_samples_(false) {} +template void VMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, QMCDriverNew::DriverTimers& timers, @@ -83,60 +84,83 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++) { //This generates an entire steps worth of deltas. - step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles()); + std::size_t num_deltas = num_walkers * sft.population.get_num_particles(); + auto deltas = generateDeltas(step_context.get_random_gen(), num_deltas); // up and down electrons are "species" within qmpack for (int ig = 0; ig < step_context.get_num_groups(); ++ig) //loop over species { - RealType tauovermass = sft.qmcdrv_input.get_tau() * sft.population.get_ptclgrp_inv_mass()[ig]; - RealType oneover2tau = 0.5 / (tauovermass); - RealType sqrttau = std::sqrt(tauovermass); + auto createTaus = [&](int ig) -> Taus { + if constexpr (std::is_same>::value) + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig], + sft.qmcdrv_input.get_spin_mass()); + else + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig]); + }; + + Taus taus = createTaus(ig); twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig); - int start_index = step_context.getPtclGroupStart(ig); - int end_index = step_context.getPtclGroupEnd(ig); + int start_index = step_context.getPtclGroupStart(ig); + int end_index = step_context.getPtclGroupEnd(ig); + std::size_t delta_offset = 0; for (int iat = start_index; iat < end_index; ++iat) { // step_context.deltaRsBegin returns an iterator to a flat series of PosTypes // fastest in walkers then particles - auto delta_r_start = step_context.deltaRsBegin() + iat * num_walkers; - auto delta_r_end = delta_r_start + num_walkers; - + auto delta_start = delta_offset + iat * num_walkers; + auto delta_end = delta_start + num_walkers; + MCCOORDS drifts; + drifts.resize(end_index - start_index - 1); if (use_drift) { twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now); - sft.drift_modifier.getDrifts(tauovermass, grads_now, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_now, drifts); - std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), - [sqrttau](const PosType& drift, const PosType& delta_r) { - return drift + (sqrttau * delta_r); - }); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + drifts.rs[ip] = drifts.rs[ip] + (taus.sqrttau * deltas.rs[ip]); + if constexpr (std::is_same>::value) + { + drifts.spins[ip] = drifts.spins[ip] + taus.spin_sqrttau * deltas.spins[ip]; + } + } } else { - std::transform(delta_r_start, delta_r_end, drifts.begin(), - [sqrttau](const PosType& delta_r) { return sqrttau * delta_r; }); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + drifts.rs[ip] = drifts.rs[ip] + (taus.sqrttau * deltas.rs[ip]); + if constexpr (std::is_same>::value) + { + drifts.spins[ip] = drifts.spins[ip] + taus.spin_sqrttau * deltas.spins[ip]; + } + } } - ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts); + ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts.rs); // This is inelegant if (use_drift) { twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new); - std::transform(delta_r_start, delta_r_end, log_gf.begin(), - [](const PosType& delta_r) { return mhalf * dot(delta_r, delta_r); }); - sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + constexpr RealType mhalf(-0.5); + log_gf[ip] = mhalf * dot(deltas.rs[ip], deltas.rs[ip]); + } + + sft.drift_modifier.getDrifts(taus.tauovermass, grads_new, drifts); - std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(), + std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.rs.begin(), drifts.rs.begin(), [iat](const ParticleSet& elecs, const PosType& drift) { return elecs.R[iat] - elecs.getActivePos() - drift; }); - std::transform(drifts.begin(), drifts.end(), log_gb.begin(), - [oneover2tau](const PosType& drift) { return -oneover2tau * dot(drift, drift); }); + std::transform(drifts.rs.begin(), drifts.rs.end(), log_gb.begin(), + [taus](const PosType& drift) { return -taus.oneover2tau * dot(drift, drift); }); } else { @@ -222,8 +246,8 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, void VMCBatched::runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - std::vector>& context_for_steps, - std::vector>& crowds) + UPtrVector& context_for_steps, + UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen()); @@ -233,7 +257,12 @@ void VMCBatched::runVMCStep(int crowd_id, const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); // For VMC we don't call this method for warmup steps. const bool accumulate_this_step = true; - advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step); + if (sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], + recompute_this_step, accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute_this_step, + accumulate_this_step); } void VMCBatched::process(xmlNodePtr node) @@ -261,7 +290,6 @@ int VMCBatched::compute_samples_per_rank(const QMCDriverInput& qmcdriver_input, return nblocks * nsteps * local_walkers; } - /** Runs the actual VMC section * * Dependent on base class state machine @@ -289,7 +317,7 @@ bool VMCBatched::run() { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_)); + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts_); } print_mem("VMCBatched after initialLogEvaluation", app_summary()); @@ -299,19 +327,21 @@ bool VMCBatched::run() if (qmcdriver_input_.get_warmup_steps() > 0) { // Run warm-up steps - auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers, - UPtrVector& context_for_steps, UPtrVector& crowds) { + auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers, auto& context_for_steps, + UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); const bool recompute = false; const bool accumulate_this_step = false; - advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); + if(sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); }; for (int step = 0; step < qmcdriver_input_.get_warmup_steps(); ++step) { ScopedTimer local_timer(timers_.run_steps_timer); - crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), std::ref(step_contexts_), - std::ref(crowds_)); + crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), step_contexts_, std::ref(crowds_)); } app_log() << "Warm-up is completed!" << std::endl; @@ -335,7 +365,7 @@ bool VMCBatched::run() { ScopedTimer local_timer(timers_.run_steps_timer); vmc_state.step = step; - crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, std::ref(step_contexts_), std::ref(crowds_)); + crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, step_contexts_, std::ref(crowds_)); if (collect_samples_) { diff --git a/src/QMCDrivers/VMC/VMCBatched.h b/src/QMCDrivers/VMC/VMCBatched.h index 78380eddb4..70098dbe9d 100644 --- a/src/QMCDrivers/VMC/VMCBatched.h +++ b/src/QMCDrivers/VMC/VMCBatched.h @@ -50,6 +50,7 @@ class VMCBatched : public QMCDriverNew const VMCDriverInput& vmcdrv_input; const DriftModifierBase& drift_modifier; const MCPopulation& population; + const MCCoordsTypes mc_coord_type; IndexType recalculate_properties_period; IndexType step = -1; bool is_recomputing_block = false; @@ -58,7 +59,7 @@ class VMCBatched : public QMCDriverNew const VMCDriverInput& vmci, DriftModifierBase& drift_mod, MCPopulation& pop) - : qmcdrv_input(qmci), vmcdrv_input(vmci), drift_modifier(drift_mod), population(pop) + : qmcdrv_input(qmci), vmcdrv_input(vmci), drift_modifier(drift_mod), population(pop), mc_coord_type(population.get_golden_electrons()->isSpinor() ? MCCoordsTypes::RSSPINS : MCCoordsTypes::RS) {} }; @@ -80,6 +81,7 @@ class VMCBatched : public QMCDriverNew * MCWalkerConfiguration layer removed. * Obfuscation of state changes via buffer and MCWalkerconfiguration require this be tested well */ + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, @@ -92,8 +94,8 @@ class VMCBatched : public QMCDriverNew static void runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - std::vector>& context_for_steps, - std::vector>& crowds); + UPtrVector& context_for_steps, + UPtrVector& crowds); /** transitional interface on the way to better walker count adjustment handling. * returns a closure taking walkers per rank and accomplishing what calc_default_local_walkers does. @@ -108,6 +110,7 @@ class VMCBatched : public QMCDriverNew */ void enable_sample_collection(); + private: int prevSteps; int prevStepsBetweenSamples; @@ -120,7 +123,6 @@ class VMCBatched : public QMCDriverNew /// Copy operator (disabled). VMCBatched& operator=(const VMCBatched&) = delete; - /// Storage for samples (later used in optimizer) SampleStack& samples_; /// Sample collection flag @@ -133,6 +135,7 @@ class VMCBatched : public QMCDriverNew }; extern std::ostream& operator<<(std::ostream& o_stream, const VMCBatched& vmc_batched); + } // namespace qmcplusplus #endif