From 2fdd9b8ff2469a5756fa9b161eb6d6d45cba32cc Mon Sep 17 00:00:00 2001 From: Peter Doak Date: Tue, 25 Jan 2022 15:15:34 -0500 Subject: [PATCH 1/4] Exposing the MCCoord abstraction through the driver classes --- .../ParticleBase/RandomSeqGenerator.h | 6 ++ src/QMCDrivers/CMakeLists.txt | 4 +- src/QMCDrivers/ContextForSteps.cpp | 9 ++- src/QMCDrivers/ContextForSteps.h | 64 +++++++++++++++++-- src/QMCDrivers/DMC/DMCBatched.cpp | 25 ++++++-- src/QMCDrivers/DMC/DMCBatched.h | 11 +++- src/QMCDrivers/QMCDriverNew.cpp | 22 +++++-- src/QMCDrivers/QMCDriverNew.h | 19 ++++-- src/QMCDrivers/VMC/VMCBatched.cpp | 32 ++++++---- src/QMCDrivers/VMC/VMCBatched.h | 13 +++- src/type_traits/template_types.hpp | 11 ++++ 11 files changed, 175 insertions(+), 41 deletions(-) diff --git a/src/Particle/ParticleBase/RandomSeqGenerator.h b/src/Particle/ParticleBase/RandomSeqGenerator.h index b79ba65ba7..58ac332c7c 100644 --- a/src/Particle/ParticleBase/RandomSeqGenerator.h +++ b/src/Particle/ParticleBase/RandomSeqGenerator.h @@ -76,6 +76,12 @@ inline void makeGaussRandomWithEngine(std::vector>& a, RG& rng) assignGaussRand(&(a[0][0]), a.size() * D, rng); } +template +inline void makeGaussRandomWithEngine(std::vector& a, RG& rng) +{ + assignGaussRand(&(a[0]), a.size(), rng); +} + template inline void makeGaussRandomWithEngine(ParticleAttrib& a, RG& rng) { diff --git a/src/QMCDrivers/CMakeLists.txt b/src/QMCDrivers/CMakeLists.txt index 83e0ed77ad..27882b160a 100644 --- a/src/QMCDrivers/CMakeLists.txt +++ b/src/QMCDrivers/CMakeLists.txt @@ -44,7 +44,6 @@ set(QMCDRIVERS WaveFunctionTester.cpp WalkerControlBase.cpp CloneManager.cpp - ContextForSteps.cpp Crowd.cpp QMCUpdateBase.cpp GreenFunctionModifiers/DriftModifierBuilder.cpp @@ -75,7 +74,8 @@ set(QMCDRIVERS CorrelatedSampling/CSVMC.cpp CorrelatedSampling/CSVMCUpdateAll.cpp CorrelatedSampling/CSVMCUpdatePbyP.cpp - CorrelatedSampling/CSUpdateBase.cpp) + CorrelatedSampling/CSUpdateBase.cpp + ContextForSteps.cpp) if(QMC_CUDA) set(QMCDRIVERS ${QMCDRIVERS} VMC/VMC_CUDA.cpp DMC/DMC_CUDA.cpp WFOpt/QMCCostFunctionCUDA.cpp) diff --git a/src/QMCDrivers/ContextForSteps.cpp b/src/QMCDrivers/ContextForSteps.cpp index 8a33760b26..91a670e35e 100644 --- a/src/QMCDrivers/ContextForSteps.cpp +++ b/src/QMCDrivers/ContextForSteps.cpp @@ -14,7 +14,9 @@ namespace qmcplusplus { -ContextForSteps::ContextForSteps(int num_walkers, + +template +ContextForSteps::ContextForSteps(int num_walkers, int num_particles, std::vector> particle_group_indexes, RandomGenerator& random_gen) @@ -29,7 +31,10 @@ ContextForSteps::ContextForSteps(int num_walkers, unique.reset(new typename std::remove_pointer::type(num_particles)); }; - walker_deltas_.resize(num_walkers * num_particles); + walker_deltas_.rs.resize(num_walkers * num_particles); } +template class ContextForSteps; +template class ContextForSteps; + } // namespace qmcplusplus diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h index 8ff6de05fd..1cdf5f71b2 100644 --- a/src/QMCDrivers/ContextForSteps.h +++ b/src/QMCDrivers/ContextForSteps.h @@ -22,6 +22,7 @@ namespace qmcplusplus { + /** Thread local context for moving walkers * * created once per driver per crowd @@ -30,6 +31,7 @@ namespace qmcplusplus * * */ +template class ContextForSteps { public: @@ -38,6 +40,40 @@ class ContextForSteps using MCPWalker = Walker; using RealType = QMCTraits::RealType; + enum class MCCoordsTypes + { + RS, + RSSPINS + }; + + static constexpr MCCoordsTypes translated_ct = spinor ? MCCoordsTypes::RSSPINS : MCCoordsTypes::RS; + + template + struct MCCoords + { + std::vector rs; + }; + + template<> + struct MCCoords + { + std::vector rs; + std::vector> spins; + }; + + template + struct MCCIt + { + std::vector::iterator irs; + }; + + template<> + struct MCCIt + { + std::vector::iterator irs; + std::vector>::iterator spins; + }; + ContextForSteps(int num_walkers, int num_particles, std::vector> particle_group_indexes, @@ -46,21 +82,34 @@ class ContextForSteps int get_num_groups() const { return particle_group_indexes_.size(); } RandomGenerator& get_random_gen() { return random_gen_; } - void nextDeltaRs(size_t num_rs) + void nextDeltas(size_t num_rs) { + walker_deltas_.rs.resize(num_rs); + makeGaussRandomWithEngine(walker_deltas_.rs, random_gen_); // hate to repeat this pattern, this should never resize. - walker_deltas_.resize(num_rs); - makeGaussRandomWithEngine(walker_deltas_, random_gen_); + if constexpr (std::is_same>::value) + { + walker_deltas_.spins.resize(num_rs); + makeGaussRandomWithEngine(walker_deltas_.spins, random_gen_); + } } - std::vector& get_walker_deltas() { return walker_deltas_; } - auto deltaRsBegin() { return walker_deltas_.begin(); }; + MCCoords& get_walker_deltas() { return walker_deltas_; } + + MCCIt deltasBegin() + { + if constexpr (std::is_same>::value) + return {walker_deltas_.rs.begin()}; + else + return {walker_deltas_.rs.begin(), walker_deltas_.spins.begin()}; + + }; int getPtclGroupStart(int group) const { return particle_group_indexes_[group].first; } int getPtclGroupEnd(int group) const { return particle_group_indexes_[group].second; } protected: - std::vector walker_deltas_; + MCCoords walker_deltas_; /** indexes of start and stop of each particle group; * @@ -71,5 +120,8 @@ class ContextForSteps RandomGenerator& random_gen_; }; +extern template class ContextForSteps; +extern template class ContextForSteps; + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp index b0123832c1..11f3899431 100644 --- a/src/QMCDrivers/DMC/DMCBatched.cpp +++ b/src/QMCDrivers/DMC/DMCBatched.cpp @@ -25,6 +25,7 @@ #include "Utilities/ProgressReportEngine.h" #include "QMCDrivers/DMC/WalkerControl.h" #include "QMCDrivers/SFNBranch.h" +#include "QMCDrivers/ContextForSteps.h" #include "MemoryUsage.h" namespace qmcplusplus @@ -64,7 +65,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - ContextForSteps& step_context, + ContextForSteps<>& step_context, bool recompute, bool accumulate_this_step) { @@ -103,8 +104,8 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, const int num_walkers = crowd.size(); //This generates an entire steps worth of deltas. - step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles()); - auto it_delta_r = step_context.deltaRsBegin(); + step_context.nextDeltas(num_walkers * sft.population.get_num_particles()); + auto it_delta_r = step_context.deltasBegin().irs; std::vector grads_now(num_walkers, TrialWaveFunction::GradType(0.0)); std::vector grads_new(num_walkers, TrialWaveFunction::GradType(0.0)); @@ -324,11 +325,12 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, } } +template void DMCBatched::runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - UPtrVector& context_for_steps, + UPtrVector>& context_for_steps, UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); @@ -397,6 +399,12 @@ void DMCBatched::process(xmlNodePtr node) } bool DMCBatched::run() +{ + return std::visit([&](auto& var) -> bool { return this->run_impl(var); }, step_contexts_); +} + +template +bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) { IndexType num_blocks = qmcdriver_input_.get_max_blocks(); @@ -410,7 +418,8 @@ bool DMCBatched::run() { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_)); + if (step_contexts_.index() == 1) + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts); } print_mem("DMCBatched after initialLogEvaluation", app_summary()); @@ -444,7 +453,8 @@ bool DMCBatched::run() { ScopedTimer local_timer(timers_.run_steps_timer); dmc_state.step = step; - crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(step_contexts_), + if(step_contexts_.index() == 1) + crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(std::get<1>(step_contexts_)), std::ref(crowds_)); { @@ -485,4 +495,7 @@ bool DMCBatched::run() return finalize(num_blocks, true); } +template bool DMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); +template bool DMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); + } // namespace qmcplusplus diff --git a/src/QMCDrivers/DMC/DMCBatched.h b/src/QMCDrivers/DMC/DMCBatched.h index bb397a2fda..f341c50637 100644 --- a/src/QMCDrivers/DMC/DMCBatched.h +++ b/src/QMCDrivers/DMC/DMCBatched.h @@ -105,13 +105,17 @@ class DMCBatched : public QMCDriverNew bool run() override; + template + bool run_impl(CONTEXTSFORSTEPS& step_contexts); + // This is the task body executed at crowd scope // it does not have access to object members by design + template static void runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - UPtrVector& move_context, + UPtrVector>& move_context, UPtrVector& crowds); @@ -136,13 +140,16 @@ class DMCBatched : public QMCDriverNew Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - ContextForSteps& move_context, + ContextForSteps<>& move_context, bool recompute, bool accumulate_this_step); friend class qmcplusplus::testing::DMCBatchedTest; }; +extern template bool DMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); +extern template bool DMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp index 0fd313be26..90fe0d2b5d 100644 --- a/src/QMCDrivers/QMCDriverNew.cpp +++ b/src/QMCDrivers/QMCDriverNew.cpp @@ -159,7 +159,7 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou population_.redistributeWalkers(crowds_); // Once they are created move contexts can be created. - createRngsStepContexts(crowds_.size()); + createRngsStepContexts(crowds_.size(), population_.get_golden_electrons()->isSpinor()); } /** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly @@ -279,9 +279,15 @@ void QMCDriverNew::makeLocalWalkers(IndexType nwalkers, * This is used instead of actually passing number of threads/crowds * controlling threads all over RandomNumberControl. */ -void QMCDriverNew::createRngsStepContexts(int num_crowds) +void QMCDriverNew::createRngsStepContexts(int num_crowds, bool spin_coords) { - step_contexts_.resize(num_crowds); + if (spin_coords) + step_contexts_ = std::vector>>{}; + else + step_contexts_ = std::vector>>{}; + + std::visit([num_crowds](auto& step_con) { step_con.resize(num_crowds); }, step_contexts_); + Rng.resize(num_crowds); if (RandomNumberControl::Children.size() == 0) @@ -294,14 +300,15 @@ void QMCDriverNew::createRngsStepContexts(int num_crowds) for (int i = 0; i < num_crowds; ++i) { Rng[i].reset(RandomNumberControl::Children[i].release()); - step_contexts_[i] = std::make_unique(crowds_[i]->size(), population_.get_num_particles(), - population_.get_particle_group_indexes(), *(Rng[i])); + std::visit([&](auto& step_contexts) { + step_contexts[i] = std::make_unique::type::value_type::element_type>(crowds_[i]->size(), population_.get_num_particles(), population_.get_particle_group_indexes(), *(Rng[i]));}, step_contexts_); } } +template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, - UPtrVector& context_for_steps) + CONTEXTSFORSTEPS& context_for_steps) { Crowd& crowd = *(crowds[crowd_id]); if (crowd.size() == 0) @@ -573,4 +580,7 @@ void QMCDriverNew::checkLogAndGL(Crowd& crowd, const std::string_view location) throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n")); } +template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinorContexts& step_contexts); +template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinSymContexts& step_contexts); + } // namespace qmcplusplus diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h index 7042070e8f..657033b8a9 100644 --- a/src/QMCDrivers/QMCDriverNew.h +++ b/src/QMCDrivers/QMCDriverNew.h @@ -27,6 +27,7 @@ #define QMCPLUSPLUS_QMCDRIVERNEW_H #include +#include #include "Configuration.h" #include "Pools/PooledData.h" @@ -74,6 +75,12 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase using RealType = QMCTraits::RealType; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; + using SpinorContext = UPtr>; + using SpinSymContext = UPtr>; + using SpinorContexts = std::vector; + using SpinSymContexts = std::vector; + using ContextsForStepsVar = std::variant; + /** separate but similar to QMCModeEnum * * a code smell @@ -176,7 +183,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase void add_H_and_Psi(QMCHamiltonian* h, TrialWaveFunction* psi) override{}; - void createRngsStepContexts(int num_crowds); + void createRngsStepContexts(int num_crowds, bool spinor_coords); void putWalkers(std::vector& wset) override; @@ -227,8 +234,8 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase */ void startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc); - static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, UPtrVector& step_context); - + template + static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, CONTEXTSFORSTEPS& step_context); /** should be set in input don't see a reason to set individually * @param pbyp if true, use particle-by-particle update @@ -380,7 +387,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase /** Per crowd move contexts, this is where the DistanceTables etc. reside */ - std::vector> step_contexts_; + ContextsForStepsVar step_contexts_; ///Random number generators UPtrVector Rng; @@ -429,6 +436,10 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase friend class qmcplusplus::testing::DMCBatchedTest; friend class qmcplusplus::testing::QMCDriverNewTestWrapper; }; + +extern template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinorContexts& step_contexts); +extern template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinSymContexts& step_contexts); + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/VMC/VMCBatched.cpp b/src/QMCDrivers/VMC/VMCBatched.cpp index 8798cce367..f13570a85b 100644 --- a/src/QMCDrivers/VMC/VMCBatched.cpp +++ b/src/QMCDrivers/VMC/VMCBatched.cpp @@ -35,10 +35,11 @@ VMCBatched::VMCBatched(const ProjectData& project_data, collect_samples_(false) {} +template void VMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, QMCDriverNew::DriverTimers& timers, - ContextForSteps& step_context, + CFS& step_context, bool recompute, bool accumulate_this_step) { @@ -83,7 +84,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++) { //This generates an entire steps worth of deltas. - step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles()); + step_context.nextDeltas(num_walkers * sft.population.get_num_particles()); // up and down electrons are "species" within qmpack for (int ig = 0; ig < step_context.get_num_groups(); ++ig) //loop over species @@ -100,7 +101,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, { // step_context.deltaRsBegin returns an iterator to a flat series of PosTypes // fastest in walkers then particles - auto delta_r_start = step_context.deltaRsBegin() + iat * num_walkers; + auto delta_r_start = step_context.deltasBegin().irs + iat * num_walkers; auto delta_r_end = delta_r_start + num_walkers; if (use_drift) @@ -219,10 +220,11 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, /** Thread body for VMC step * */ +template void VMCBatched::runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - std::vector>& context_for_steps, + CFS& context_for_steps, std::vector>& crowds) { Crowd& crowd = *(crowds[crowd_id]); @@ -261,7 +263,10 @@ int VMCBatched::compute_samples_per_rank(const QMCDriverInput& qmcdriver_input, return nblocks * nsteps * local_walkers; } - +bool VMCBatched::run() +{ + return std::visit([&](auto& var) -> bool { return run_impl(var); }, step_contexts_); +} /** Runs the actual VMC section * * Dependent on base class state machine @@ -274,7 +279,8 @@ int VMCBatched::compute_samples_per_rank(const QMCDriverInput& qmcdriver_input, * If does consider giving more to the thread by value that should * end up thread local. (I think) */ -bool VMCBatched::run() +template +bool VMCBatched::run_impl(CONTEXTFORSTEPS& step_contexts) { IndexType num_blocks = qmcdriver_input_.get_max_blocks(); //start the main estimator @@ -289,7 +295,8 @@ bool VMCBatched::run() { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_)); + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), + step_contexts); } print_mem("VMCBatched after initialLogEvaluation", app_summary()); @@ -299,8 +306,8 @@ bool VMCBatched::run() if (qmcdriver_input_.get_warmup_steps() > 0) { // Run warm-up steps - auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers, - UPtrVector& context_for_steps, UPtrVector& crowds) { + auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers, auto& context_for_steps, + UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); const bool recompute = false; const bool accumulate_this_step = false; @@ -310,7 +317,7 @@ bool VMCBatched::run() for (int step = 0; step < qmcdriver_input_.get_warmup_steps(); ++step) { ScopedTimer local_timer(timers_.run_steps_timer); - crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), std::ref(step_contexts_), + crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), step_contexts, std::ref(crowds_)); } @@ -335,7 +342,7 @@ bool VMCBatched::run() { ScopedTimer local_timer(timers_.run_steps_timer); vmc_state.step = step; - crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, std::ref(step_contexts_), std::ref(crowds_)); + crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, step_contexts, std::ref(crowds_)); if (collect_samples_) { @@ -408,4 +415,7 @@ void VMCBatched::enable_sample_collection() app_log() << " total samples = " << total_samples << '\n'; } +template bool VMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); +template bool VMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); + } // namespace qmcplusplus diff --git a/src/QMCDrivers/VMC/VMCBatched.h b/src/QMCDrivers/VMC/VMCBatched.h index 78380eddb4..a507832aaf 100644 --- a/src/QMCDrivers/VMC/VMCBatched.h +++ b/src/QMCDrivers/VMC/VMCBatched.h @@ -75,24 +75,28 @@ class VMCBatched : public QMCDriverNew bool run() override; + template + bool run_impl(CFS& cfs); /** Refactor of VMCUpdatePbyP in crowd context * * MCWalkerConfiguration layer removed. * Obfuscation of state changes via buffer and MCWalkerconfiguration require this be tested well */ + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, - ContextForSteps& move_context, + CFS& move_context, bool recompute, bool accumulate_this_step); // This is the task body executed at crowd scope // it does not have access to object member variables by design + template static void runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - std::vector>& context_for_steps, + CFS& context_for_steps, std::vector>& crowds); /** transitional interface on the way to better walker count adjustment handling. @@ -108,6 +112,7 @@ class VMCBatched : public QMCDriverNew */ void enable_sample_collection(); + private: int prevSteps; int prevStepsBetweenSamples; @@ -133,6 +138,10 @@ class VMCBatched : public QMCDriverNew }; extern std::ostream& operator<<(std::ostream& o_stream, const VMCBatched& vmc_batched); + +extern template bool VMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); +extern template bool VMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); + } // namespace qmcplusplus #endif diff --git a/src/type_traits/template_types.hpp b/src/type_traits/template_types.hpp index 3af0ea5b82..c616367548 100644 --- a/src/type_traits/template_types.hpp +++ b/src/type_traits/template_types.hpp @@ -92,6 +92,17 @@ static RefVector convertUPtrToRefVector(const UPtrVector& ptr_list) return ref_list; } +template +static RefVector convertUPtrVariantToRefVector(const UPtrVector& ptr_list) +{ + RefVector ref_list; + ref_list.reserve(ptr_list.size()); + for (const UPtr& ptr : ptr_list) + ref_list.push_back(std::get(*ptr)); + return ref_list; +} + + // temporary helper function template static std::vector convert_ref_to_ptr_list(const std::vector>& ref_list) From eb08fd36a7600fabc215890b81d7a677b4c7af38 Mon Sep 17 00:00:00 2001 From: Peter Doak Date: Tue, 25 Jan 2022 15:47:45 -0500 Subject: [PATCH 2/4] all unit tests passing --- src/QMCDrivers/CMakeLists.txt | 4 ++-- src/QMCDrivers/ContextForSteps.cpp | 14 +++++++------- src/QMCDrivers/ContextForSteps.h | 31 ++++++++++++++---------------- src/QMCDrivers/DMC/DMCBatched.cpp | 14 ++++++++------ src/QMCDrivers/DMC/DMCBatched.h | 7 ++++--- src/QMCDrivers/QMCDriverNew.cpp | 4 ++-- src/QMCDrivers/QMCDriverNew.h | 4 ++-- src/type_traits/template_types.hpp | 11 ----------- 8 files changed, 39 insertions(+), 50 deletions(-) diff --git a/src/QMCDrivers/CMakeLists.txt b/src/QMCDrivers/CMakeLists.txt index 27882b160a..83e0ed77ad 100644 --- a/src/QMCDrivers/CMakeLists.txt +++ b/src/QMCDrivers/CMakeLists.txt @@ -44,6 +44,7 @@ set(QMCDRIVERS WaveFunctionTester.cpp WalkerControlBase.cpp CloneManager.cpp + ContextForSteps.cpp Crowd.cpp QMCUpdateBase.cpp GreenFunctionModifiers/DriftModifierBuilder.cpp @@ -74,8 +75,7 @@ set(QMCDRIVERS CorrelatedSampling/CSVMC.cpp CorrelatedSampling/CSVMCUpdateAll.cpp CorrelatedSampling/CSVMCUpdatePbyP.cpp - CorrelatedSampling/CSUpdateBase.cpp - ContextForSteps.cpp) + CorrelatedSampling/CSUpdateBase.cpp) if(QMC_CUDA) set(QMCDRIVERS ${QMCDRIVERS} VMC/VMC_CUDA.cpp DMC/DMC_CUDA.cpp WFOpt/QMCCostFunctionCUDA.cpp) diff --git a/src/QMCDrivers/ContextForSteps.cpp b/src/QMCDrivers/ContextForSteps.cpp index 91a670e35e..29960e0336 100644 --- a/src/QMCDrivers/ContextForSteps.cpp +++ b/src/QMCDrivers/ContextForSteps.cpp @@ -15,11 +15,11 @@ namespace qmcplusplus { -template -ContextForSteps::ContextForSteps(int num_walkers, - int num_particles, - std::vector> particle_group_indexes, - RandomGenerator& random_gen) +template +ContextForSteps::ContextForSteps(int num_walkers, + int num_particles, + std::vector> particle_group_indexes, + RandomGenerator& random_gen) : particle_group_indexes_(particle_group_indexes), random_gen_(random_gen) { /** glambda to create type T with constructor T(int) and put in it unique_ptr @@ -34,7 +34,7 @@ ContextForSteps::ContextForSteps(int num_walkers, walker_deltas_.rs.resize(num_walkers * num_particles); } -template class ContextForSteps; -template class ContextForSteps; +template class ContextForSteps; +template class ContextForSteps; } // namespace qmcplusplus diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h index 1cdf5f71b2..f6d6ff129f 100644 --- a/src/QMCDrivers/ContextForSteps.h +++ b/src/QMCDrivers/ContextForSteps.h @@ -23,6 +23,12 @@ namespace qmcplusplus { +enum class MCCoordsTypes +{ + RS, + RSSPINS +}; + /** Thread local context for moving walkers * * created once per driver per crowd @@ -31,7 +37,7 @@ namespace qmcplusplus * * */ -template +template class ContextForSteps { public: @@ -40,14 +46,6 @@ class ContextForSteps using MCPWalker = Walker; using RealType = QMCTraits::RealType; - enum class MCCoordsTypes - { - RS, - RSSPINS - }; - - static constexpr MCCoordsTypes translated_ct = spinor ? MCCoordsTypes::RSSPINS : MCCoordsTypes::RS; - template struct MCCoords { @@ -94,22 +92,21 @@ class ContextForSteps } } - MCCoords& get_walker_deltas() { return walker_deltas_; } + MCCoords& get_walker_deltas() { return walker_deltas_; } - MCCIt deltasBegin() + MCCIt deltasBegin() { if constexpr (std::is_same>::value) - return {walker_deltas_.rs.begin()}; + return {walker_deltas_.rs.begin()}; else - return {walker_deltas_.rs.begin(), walker_deltas_.spins.begin()}; - + return {walker_deltas_.rs.begin(), walker_deltas_.spins.begin()}; }; int getPtclGroupStart(int group) const { return particle_group_indexes_[group].first; } int getPtclGroupEnd(int group) const { return particle_group_indexes_[group].second; } protected: - MCCoords walker_deltas_; + MCCoords walker_deltas_; /** indexes of start and stop of each particle group; * @@ -120,8 +117,8 @@ class ContextForSteps RandomGenerator& random_gen_; }; -extern template class ContextForSteps; -extern template class ContextForSteps; +extern template class ContextForSteps; +extern template class ContextForSteps; } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp index 11f3899431..8c4288fdca 100644 --- a/src/QMCDrivers/DMC/DMCBatched.cpp +++ b/src/QMCDrivers/DMC/DMCBatched.cpp @@ -61,11 +61,12 @@ void DMCBatched::setNonLocalMoveHandler(QMCHamiltonian& golden_hamiltonian) dmcdriver_input_.get_alpha(), dmcdriver_input_.get_gamma()); } +template void DMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - ContextForSteps<>& step_context, + CFS& step_context, bool recompute, bool accumulate_this_step) { @@ -205,7 +206,9 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts); std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(), - [iat](auto& elecs, auto& drift) { return elecs.get().R[iat] - elecs.get().getActivePos() - drift; }); + [iat](auto& elecs, auto& drift) { + return elecs.get().R[iat] - elecs.get().getActivePos() - drift; + }); std::transform(drifts.begin(), drifts.end(), log_gb.begin(), [oneover2tau](auto& drift) { return -oneover2tau * dot(drift, drift); }); @@ -325,12 +328,12 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, } } -template +template void DMCBatched::runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - UPtrVector>& context_for_steps, + CONTEXTSFORSTEPS& context_for_steps, UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); @@ -453,8 +456,7 @@ bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) { ScopedTimer local_timer(timers_.run_steps_timer); dmc_state.step = step; - if(step_contexts_.index() == 1) - crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(std::get<1>(step_contexts_)), + crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, step_contexts, std::ref(crowds_)); { diff --git a/src/QMCDrivers/DMC/DMCBatched.h b/src/QMCDrivers/DMC/DMCBatched.h index f341c50637..5a5fac711e 100644 --- a/src/QMCDrivers/DMC/DMCBatched.h +++ b/src/QMCDrivers/DMC/DMCBatched.h @@ -110,12 +110,12 @@ class DMCBatched : public QMCDriverNew // This is the task body executed at crowd scope // it does not have access to object members by design - template + template static void runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - UPtrVector>& move_context, + CONTEXTSFORSTEPS& move_context, UPtrVector& crowds); @@ -136,11 +136,12 @@ class DMCBatched : public QMCDriverNew ///walker controller for load-balance std::unique_ptr walker_controller_; + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - ContextForSteps<>& move_context, + CONTEXTSFORSTEPS& move_context, bool recompute, bool accumulate_this_step); diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp index 90fe0d2b5d..a1216982a4 100644 --- a/src/QMCDrivers/QMCDriverNew.cpp +++ b/src/QMCDrivers/QMCDriverNew.cpp @@ -282,9 +282,9 @@ void QMCDriverNew::makeLocalWalkers(IndexType nwalkers, void QMCDriverNew::createRngsStepContexts(int num_crowds, bool spin_coords) { if (spin_coords) - step_contexts_ = std::vector>>{}; + step_contexts_ = std::vector>>{}; else - step_contexts_ = std::vector>>{}; + step_contexts_ = std::vector>>{}; std::visit([num_crowds](auto& step_con) { step_con.resize(num_crowds); }, step_contexts_); diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h index 657033b8a9..6c5e8c2e46 100644 --- a/src/QMCDrivers/QMCDriverNew.h +++ b/src/QMCDrivers/QMCDriverNew.h @@ -75,8 +75,8 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase using RealType = QMCTraits::RealType; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; - using SpinorContext = UPtr>; - using SpinSymContext = UPtr>; + using SpinorContext = UPtr>; + using SpinSymContext = UPtr>; using SpinorContexts = std::vector; using SpinSymContexts = std::vector; using ContextsForStepsVar = std::variant; diff --git a/src/type_traits/template_types.hpp b/src/type_traits/template_types.hpp index c616367548..3af0ea5b82 100644 --- a/src/type_traits/template_types.hpp +++ b/src/type_traits/template_types.hpp @@ -92,17 +92,6 @@ static RefVector convertUPtrToRefVector(const UPtrVector& ptr_list) return ref_list; } -template -static RefVector convertUPtrVariantToRefVector(const UPtrVector& ptr_list) -{ - RefVector ref_list; - ref_list.reserve(ptr_list.size()); - for (const UPtr& ptr : ptr_list) - ref_list.push_back(std::get(*ptr)); - return ref_list; -} - - // temporary helper function template static std::vector convert_ref_to_ptr_list(const std::vector>& ref_list) From 89ce0098848cb9a545d726ab06e097a2860978b6 Mon Sep 17 00:00:00 2001 From: Peter Doak Date: Wed, 26 Jan 2022 12:07:20 -0500 Subject: [PATCH 3/4] pushing templating on MCCoords down. --- src/QMCDrivers/CMakeLists.txt | 1 + src/QMCDrivers/ContextForSteps.cpp | 21 +--- src/QMCDrivers/ContextForSteps.h | 64 +---------- src/QMCDrivers/DMC/DMCBatched.cpp | 95 ++++++++------- src/QMCDrivers/DMC/DMCBatched.h | 13 +-- .../DriftModifierBase.h | 8 +- .../DriftModifierUNR.cpp | 12 +- .../GreenFunctionModifiers/DriftModifierUNR.h | 7 +- src/QMCDrivers/QMCDriverInput.h | 2 + src/QMCDrivers/QMCDriverNew.cpp | 23 +--- src/QMCDrivers/QMCDriverNew.h | 15 +-- src/QMCDrivers/VMC/VMCBatched.cpp | 108 +++++++++++------- src/QMCDrivers/VMC/VMCBatched.h | 18 +-- 13 files changed, 165 insertions(+), 222 deletions(-) diff --git a/src/QMCDrivers/CMakeLists.txt b/src/QMCDrivers/CMakeLists.txt index 83e0ed77ad..1dd51b4008 100644 --- a/src/QMCDrivers/CMakeLists.txt +++ b/src/QMCDrivers/CMakeLists.txt @@ -49,6 +49,7 @@ set(QMCDRIVERS QMCUpdateBase.cpp GreenFunctionModifiers/DriftModifierBuilder.cpp GreenFunctionModifiers/DriftModifierUNR.cpp + MCCoords.cpp VMC/VMCUpdatePbyP.cpp VMC/VMCUpdateAll.cpp VMC/VMCFactory.cpp diff --git a/src/QMCDrivers/ContextForSteps.cpp b/src/QMCDrivers/ContextForSteps.cpp index 29960e0336..0289994b92 100644 --- a/src/QMCDrivers/ContextForSteps.cpp +++ b/src/QMCDrivers/ContextForSteps.cpp @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 developers. +// Copyright (c) 2022 developers. // // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory // @@ -15,26 +15,11 @@ namespace qmcplusplus { -template -ContextForSteps::ContextForSteps(int num_walkers, +ContextForSteps::ContextForSteps(int num_walkers, int num_particles, std::vector> particle_group_indexes, RandomGenerator& random_gen) : particle_group_indexes_(particle_group_indexes), random_gen_(random_gen) -{ - /** glambda to create type T with constructor T(int) and put in it unique_ptr - * - * captures num_particles to use as argument to constructor - * gets T for type unique_ptr unique is templated on - */ - auto constructT = [num_particles](auto& unique) { - unique.reset(new typename std::remove_pointer::type(num_particles)); - }; - - walker_deltas_.rs.resize(num_walkers * num_particles); -} - -template class ContextForSteps; -template class ContextForSteps; +{} } // namespace qmcplusplus diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h index f6d6ff129f..cd32b624eb 100644 --- a/src/QMCDrivers/ContextForSteps.h +++ b/src/QMCDrivers/ContextForSteps.h @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 developers. +// Copyright (c) 2022 developers. // // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory // @@ -19,16 +19,11 @@ #include "Particle/Walker.h" #include "QMCDrivers/Crowd.h" #include "ParticleBase/RandomSeqGenerator.h" +#include "MCCoords.hpp" namespace qmcplusplus { -enum class MCCoordsTypes -{ - RS, - RSSPINS -}; - /** Thread local context for moving walkers * * created once per driver per crowd @@ -37,7 +32,6 @@ enum class MCCoordsTypes * * */ -template class ContextForSteps { public: @@ -46,32 +40,6 @@ class ContextForSteps using MCPWalker = Walker; using RealType = QMCTraits::RealType; - template - struct MCCoords - { - std::vector rs; - }; - - template<> - struct MCCoords - { - std::vector rs; - std::vector> spins; - }; - - template - struct MCCIt - { - std::vector::iterator irs; - }; - - template<> - struct MCCIt - { - std::vector::iterator irs; - std::vector>::iterator spins; - }; - ContextForSteps(int num_walkers, int num_particles, std::vector> particle_group_indexes, @@ -80,34 +48,9 @@ class ContextForSteps int get_num_groups() const { return particle_group_indexes_.size(); } RandomGenerator& get_random_gen() { return random_gen_; } - void nextDeltas(size_t num_rs) - { - walker_deltas_.rs.resize(num_rs); - makeGaussRandomWithEngine(walker_deltas_.rs, random_gen_); - // hate to repeat this pattern, this should never resize. - if constexpr (std::is_same>::value) - { - walker_deltas_.spins.resize(num_rs); - makeGaussRandomWithEngine(walker_deltas_.spins, random_gen_); - } - } - - MCCoords& get_walker_deltas() { return walker_deltas_; } - - MCCIt deltasBegin() - { - if constexpr (std::is_same>::value) - return {walker_deltas_.rs.begin()}; - else - return {walker_deltas_.rs.begin(), walker_deltas_.spins.begin()}; - }; - int getPtclGroupStart(int group) const { return particle_group_indexes_[group].first; } int getPtclGroupEnd(int group) const { return particle_group_indexes_[group].second; } - protected: - MCCoords walker_deltas_; - /** indexes of start and stop of each particle group; * * Seems like these should be iterators but haven't thought through the implications. @@ -117,8 +60,5 @@ class ContextForSteps RandomGenerator& random_gen_; }; -extern template class ContextForSteps; -extern template class ContextForSteps; - } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp index 8c4288fdca..a621d02640 100644 --- a/src/QMCDrivers/DMC/DMCBatched.cpp +++ b/src/QMCDrivers/DMC/DMCBatched.cpp @@ -61,12 +61,12 @@ void DMCBatched::setNonLocalMoveHandler(QMCHamiltonian& golden_hamiltonian) dmcdriver_input_.get_alpha(), dmcdriver_input_.get_gamma()); } -template +template void DMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - CFS& step_context, + ContextForSteps& step_context, bool recompute, bool accumulate_this_step) { @@ -105,13 +105,13 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, const int num_walkers = crowd.size(); //This generates an entire steps worth of deltas. - step_context.nextDeltas(num_walkers * sft.population.get_num_particles()); - auto it_delta_r = step_context.deltasBegin().irs; + std::size_t num_deltas = num_walkers * sft.population.get_num_particles(); std::vector grads_now(num_walkers, TrialWaveFunction::GradType(0.0)); std::vector grads_new(num_walkers, TrialWaveFunction::GradType(0.0)); std::vector ratios(num_walkers, TrialWaveFunction::PsiValueType(0.0)); - std::vector drifts(num_walkers, 0.0); + auto deltas = generateDeltas(step_context.get_random_gen(), num_deltas); + std::vector log_gf(num_walkers, 0.0); std::vector log_gb(num_walkers, 0.0); std::vector prob(num_walkers, 0.0); @@ -132,18 +132,28 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, ScopedTimer pbyp_local_timer(timers.movepbyp_timer); for (int ig = 0; ig < step_context.get_num_groups(); ++ig) { - RealType tauovermass = sft.qmcdrv_input.get_tau() * sft.population.get_ptclgrp_inv_mass()[ig]; - RealType oneover2tau = 0.5 / (tauovermass); - RealType sqrttau = std::sqrt(tauovermass); + auto createTaus = [&](int ig) -> Taus { + if constexpr (std::is_same>::value) + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig], + sft.qmcdrv_input.get_spin_mass()); + else + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig]); + }; + + Taus taus = createTaus(ig); twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig); - int start_index = step_context.getPtclGroupStart(ig); - int end_index = step_context.getPtclGroupEnd(ig); + int start_index = step_context.getPtclGroupStart(ig); + int end_index = step_context.getPtclGroupEnd(ig); + std::size_t delta_offset = 0; + // end_index is one beyond last index to the group + MCCOORDS drifts; + drifts.resize(end_index - start_index - 1); for (int iat = start_index; iat < end_index; ++iat) { - auto delta_r_start = it_delta_r + iat * num_walkers; - auto delta_r_end = delta_r_start + num_walkers; + auto delta_start = delta_offset + iat * num_walkers; + auto delta_end = delta_start + num_walkers; //This is very useful thing to be able to look at in the debugger #ifndef NDEBUG @@ -156,17 +166,22 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, #endif //get the displacement twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now); - sft.drift_modifier.getDrifts(tauovermass, grads_now, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_now, drifts); + - std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), - [sqrttau](PosType& drift, PosType& delta_r) { return drift + (sqrttau * delta_r); }); + // std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), + // [sqrttau](PosType& drift, PosType& delta_r) { return drift + (sqrttau * delta_r); }); // only DMC does this // TODO: rr needs a real name std::vector rr(num_walkers, 0.0); - assert(rr.size() == delta_r_end - delta_r_start); - std::transform(delta_r_start, delta_r_end, rr.begin(), - [tauovermass](auto& delta_r) { return tauovermass * dot(delta_r, delta_r); }); + assert(rr.size() == delta_end - delta_start - 1); + for(std::size_t ip = delta_start; ip < delta_end; ++ip) + { + rr[ip] = taus.tauovermass * dot(deltas.rs[ip], deltas.rs[ip]); + } + //std::transform(delta_start, delta_end, rr.begin(), + // [tauovermass](auto& delta_r) { return tauovermass * dot(delta_r, delta_r); }); // in DMC this was done here, changed to match VMCBatched pending factoring to common source // if (rr > m_r2max) @@ -178,7 +193,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, for (int i = 0; i < rr.size(); ++i) assert(std::isfinite(rr[i])); #endif - ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts); + ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts.rs); twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new); @@ -198,20 +213,25 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, rr_proposed[iw] += rr[iw]; } - std::transform(delta_r_start, delta_r_end, log_gf.begin(), [](auto& delta_r) { + for(std::size_t ip = delta_start; ip < delta_end; ++ip) + { constexpr RealType mhalf(-0.5); - return mhalf * dot(delta_r, delta_r); - }); + log_gf[ip] = mhalf * dot(deltas.rs[ip], deltas.rs[ip]); + } + // std::transform(delta_r_start, delta_r_end, log_gf.begin(), [](auto& delta_r) { + // constexpr RealType mhalf(-0.5); + // return mhalf * dot(delta_r, delta_r); + // }); - sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_new, drifts); - std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(), + std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.rs.begin(), drifts.rs.begin(), [iat](auto& elecs, auto& drift) { return elecs.get().R[iat] - elecs.get().getActivePos() - drift; }); - std::transform(drifts.begin(), drifts.end(), log_gb.begin(), - [oneover2tau](auto& drift) { return -oneover2tau * dot(drift, drift); }); + std::transform(drifts.rs.begin(), drifts.rs.end(), log_gb.begin(), + [taus](auto& drift) { return -taus.oneover2tau * dot(drift, drift); }); for (int iw = 0; iw < num_walkers; ++iw) prob[iw] = std::norm(ratios[iw]) * std::exp(log_gb[iw] - log_gf[iw]); @@ -328,12 +348,11 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, } } -template void DMCBatched::runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - CONTEXTSFORSTEPS& context_for_steps, + UPtrVector& context_for_steps, UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); @@ -349,7 +368,11 @@ void DMCBatched::runDMCStep(int crowd_id, // Are we entering the the last step of a block to recompute at? const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); const bool accumulate_this_step = true; - advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, + if(sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, + accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step); } @@ -402,12 +425,6 @@ void DMCBatched::process(xmlNodePtr node) } bool DMCBatched::run() -{ - return std::visit([&](auto& var) -> bool { return this->run_impl(var); }, step_contexts_); -} - -template -bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) { IndexType num_blocks = qmcdriver_input_.get_max_blocks(); @@ -421,8 +438,7 @@ bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - if (step_contexts_.index() == 1) - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts); + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts_); } print_mem("DMCBatched after initialLogEvaluation", app_summary()); @@ -456,7 +472,7 @@ bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) { ScopedTimer local_timer(timers_.run_steps_timer); dmc_state.step = step; - crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, step_contexts, + crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, step_contexts_, std::ref(crowds_)); { @@ -497,7 +513,4 @@ bool DMCBatched::run_impl(CONTEXTSFORSTEPS& step_contexts) return finalize(num_blocks, true); } -template bool DMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); -template bool DMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); - } // namespace qmcplusplus diff --git a/src/QMCDrivers/DMC/DMCBatched.h b/src/QMCDrivers/DMC/DMCBatched.h index 5a5fac711e..2b214b569b 100644 --- a/src/QMCDrivers/DMC/DMCBatched.h +++ b/src/QMCDrivers/DMC/DMCBatched.h @@ -104,18 +104,14 @@ class DMCBatched : public QMCDriverNew void process(xmlNodePtr cur) override; bool run() override; - - template - bool run_impl(CONTEXTSFORSTEPS& step_contexts); // This is the task body executed at crowd scope // it does not have access to object members by design - template static void runDMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, DMCTimers& dmc_timers, - CONTEXTSFORSTEPS& move_context, + UPtrVector& move_context, UPtrVector& crowds); @@ -136,20 +132,17 @@ class DMCBatched : public QMCDriverNew ///walker controller for load-balance std::unique_ptr walker_controller_; - template + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, DMCTimers& dmc_timers, - CONTEXTSFORSTEPS& move_context, + ContextForSteps& move_context, bool recompute, bool accumulate_this_step); friend class qmcplusplus::testing::DMCBatchedTest; }; - -extern template bool DMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); -extern template bool DMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); } // namespace qmcplusplus diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h index cb70d532cc..4cc12b41f1 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h @@ -17,10 +17,13 @@ #include "Particle/ParticleSet.h" #include "QMCWaveFunctions/TrialWaveFunction.h" #include "QMCHamiltonians/QMCHamiltonian.h" +#include "MCCoords.hpp" namespace qmcplusplus { -/// this class implements drift modification +/** this class implements drift modification + * its a completely pointless type erasue. + */ class DriftModifierBase { public: @@ -38,7 +41,8 @@ class DriftModifierBase virtual void getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const = 0; - virtual void getDrifts(RealType tau, const std::vector& qf, std::vector&) const = 0; + virtual void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const = 0; + virtual void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const = 0; virtual bool parseXML(xmlNodePtr cur) { return true; } diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp index 82422eb4c6..d1c0c861d8 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp @@ -82,11 +82,19 @@ void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet #endif } -void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, std::vector& drift) const +void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, MCCoords& drift) const { for (int i = 0; i < qf.size(); ++i) { - getDrift(tau, qf[i], drift[i]); + getDrift(tau, qf[i], drift.rs[i]); + } +} + +void DriftModifierUNR::getDrifts(RealType tau, const std::vector& qf, MCCoords& drift) const +{ + for (int i = 0; i < qf.size(); ++i) + { + getDrift(tau, qf[i], drift.rs[i]); } } diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h index 64a578007b..807753dfb9 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.h @@ -14,6 +14,7 @@ #define QMCPLUSPLUS_DRIFTMODIFIER_UNR_H #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" +#include "QMCDrivers/MCCoords.hpp" namespace qmcplusplus { @@ -23,8 +24,9 @@ class DriftModifierUNR : public DriftModifierBase using RealType = QMCTraits::RealType; using PosType = QMCTraits::PosType; - void getDrifts(RealType tau, const std::vector& qf, std::vector&) const final; - + void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const final; + void getDrifts(RealType tau, const std::vector& qf, MCCoords&) const final; + void getDrift(RealType tau, const GradType& qf, PosType& drift) const final; void getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const final; @@ -38,6 +40,7 @@ class DriftModifierUNR : public DriftModifierBase RealType a_; }; + } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/QMCDriverInput.h b/src/QMCDrivers/QMCDriverInput.h index b311f12a3e..08c545d97b 100644 --- a/src/QMCDrivers/QMCDriverInput.h +++ b/src/QMCDrivers/QMCDriverInput.h @@ -71,6 +71,7 @@ class QMCDriverInput IndexType steps_between_samples_ = 1; IndexType samples_per_thread_ = 0; RealType tau_ = 0.1; + RealType spin_mass_ = 1.0; // call recompute at the end of each block in the full/mixed precision case. IndexType blocks_between_recompute_ = std::is_same::value ? 0 : 1; bool append_run_ = false; @@ -113,6 +114,7 @@ class QMCDriverInput IndexType get_steps_between_samples() const { return steps_between_samples_; } IndexType get_samples_per_thread() const { return samples_per_thread_; } RealType get_tau() const { return tau_; } + RealType get_spin_mass() const { return spin_mass_; } IndexType get_blocks_between_recompute() const { return blocks_between_recompute_; } bool get_append_run() const { return append_run_; } input::PeriodStride get_walker_dump_period() const { return walker_dump_period_; } diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp index a1216982a4..0737c5a5cb 100644 --- a/src/QMCDrivers/QMCDriverNew.cpp +++ b/src/QMCDrivers/QMCDriverNew.cpp @@ -159,7 +159,7 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou population_.redistributeWalkers(crowds_); // Once they are created move contexts can be created. - createRngsStepContexts(crowds_.size(), population_.get_golden_electrons()->isSpinor()); + createRngsStepContexts(crowds_.size()); } /** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly @@ -279,15 +279,9 @@ void QMCDriverNew::makeLocalWalkers(IndexType nwalkers, * This is used instead of actually passing number of threads/crowds * controlling threads all over RandomNumberControl. */ -void QMCDriverNew::createRngsStepContexts(int num_crowds, bool spin_coords) +void QMCDriverNew::createRngsStepContexts(int num_crowds) { - if (spin_coords) - step_contexts_ = std::vector>>{}; - else - step_contexts_ = std::vector>>{}; - - std::visit([num_crowds](auto& step_con) { step_con.resize(num_crowds); }, step_contexts_); - + step_contexts_.resize(num_crowds); Rng.resize(num_crowds); if (RandomNumberControl::Children.size() == 0) @@ -300,21 +294,19 @@ void QMCDriverNew::createRngsStepContexts(int num_crowds, bool spin_coords) for (int i = 0; i < num_crowds; ++i) { Rng[i].reset(RandomNumberControl::Children[i].release()); - std::visit([&](auto& step_contexts) { - step_contexts[i] = std::make_unique::type::value_type::element_type>(crowds_[i]->size(), population_.get_num_particles(), population_.get_particle_group_indexes(), *(Rng[i]));}, step_contexts_); + step_contexts_[i] = std::make_unique(crowds_[i]->size(), population_.get_num_particles(), population_.get_particle_group_indexes(), *(Rng[i])); } } -template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, - CONTEXTSFORSTEPS& context_for_steps) + UPtrVector& step_contexts) { Crowd& crowd = *(crowds[crowd_id]); if (crowd.size() == 0) return; - crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen()); + crowd.setRNGForHamiltonian(step_contexts[crowd_id]->get_random_gen()); auto& ps_dispatcher = crowd.dispatchers_.ps_dispatcher_; auto& twf_dispatcher = crowd.dispatchers_.twf_dispatcher_; auto& ham_dispatcher = crowd.dispatchers_.ham_dispatcher_; @@ -580,7 +572,4 @@ void QMCDriverNew::checkLogAndGL(Crowd& crowd, const std::string_view location) throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n")); } -template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinorContexts& step_contexts); -template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinSymContexts& step_contexts); - } // namespace qmcplusplus diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h index 6c5e8c2e46..8773337c9a 100644 --- a/src/QMCDrivers/QMCDriverNew.h +++ b/src/QMCDrivers/QMCDriverNew.h @@ -75,11 +75,6 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase using RealType = QMCTraits::RealType; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; - using SpinorContext = UPtr>; - using SpinSymContext = UPtr>; - using SpinorContexts = std::vector; - using SpinSymContexts = std::vector; - using ContextsForStepsVar = std::variant; /** separate but similar to QMCModeEnum * @@ -183,7 +178,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase void add_H_and_Psi(QMCHamiltonian* h, TrialWaveFunction* psi) override{}; - void createRngsStepContexts(int num_crowds, bool spinor_coords); + void createRngsStepContexts(int num_crowds); void putWalkers(std::vector& wset) override; @@ -234,8 +229,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase */ void startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc); - template - static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, CONTEXTSFORSTEPS& step_context); + static void initialLogEvaluation(int crowd_id, UPtrVector& crowds, UPtrVector& step_contexts); /** should be set in input don't see a reason to set individually * @param pbyp if true, use particle-by-particle update @@ -387,7 +381,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase /** Per crowd move contexts, this is where the DistanceTables etc. reside */ - ContextsForStepsVar step_contexts_; + UPtrVector step_contexts_; ///Random number generators UPtrVector Rng; @@ -437,9 +431,6 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase friend class qmcplusplus::testing::QMCDriverNewTestWrapper; }; -extern template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinorContexts& step_contexts); -extern template void QMCDriverNew::initialLogEvaluation(int crowd_id, UPtrVector& crowds, SpinSymContexts& step_contexts); - } // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/VMC/VMCBatched.cpp b/src/QMCDrivers/VMC/VMCBatched.cpp index f13570a85b..146cdcdc14 100644 --- a/src/QMCDrivers/VMC/VMCBatched.cpp +++ b/src/QMCDrivers/VMC/VMCBatched.cpp @@ -35,11 +35,11 @@ VMCBatched::VMCBatched(const ProjectData& project_data, collect_samples_(false) {} -template +template void VMCBatched::advanceWalkers(const StateForThread& sft, Crowd& crowd, QMCDriverNew::DriverTimers& timers, - CFS& step_context, + ContextForSteps& step_context, bool recompute, bool accumulate_this_step) { @@ -84,60 +84,83 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++) { //This generates an entire steps worth of deltas. - step_context.nextDeltas(num_walkers * sft.population.get_num_particles()); + std::size_t num_deltas = num_walkers * sft.population.get_num_particles(); + auto deltas = generateDeltas(step_context.get_random_gen(), num_deltas); // up and down electrons are "species" within qmpack for (int ig = 0; ig < step_context.get_num_groups(); ++ig) //loop over species { - RealType tauovermass = sft.qmcdrv_input.get_tau() * sft.population.get_ptclgrp_inv_mass()[ig]; - RealType oneover2tau = 0.5 / (tauovermass); - RealType sqrttau = std::sqrt(tauovermass); + auto createTaus = [&](int ig) -> Taus { + if constexpr (std::is_same>::value) + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig], + sft.qmcdrv_input.get_spin_mass()); + else + return Taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig]); + }; + + Taus taus = createTaus(ig); twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig); - int start_index = step_context.getPtclGroupStart(ig); - int end_index = step_context.getPtclGroupEnd(ig); + int start_index = step_context.getPtclGroupStart(ig); + int end_index = step_context.getPtclGroupEnd(ig); + std::size_t delta_offset = 0; for (int iat = start_index; iat < end_index; ++iat) { // step_context.deltaRsBegin returns an iterator to a flat series of PosTypes // fastest in walkers then particles - auto delta_r_start = step_context.deltasBegin().irs + iat * num_walkers; - auto delta_r_end = delta_r_start + num_walkers; - + auto delta_start = delta_offset + iat * num_walkers; + auto delta_end = delta_start + num_walkers; + MCCOORDS drifts; + drifts.resize(end_index - start_index - 1); if (use_drift) { twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now); - sft.drift_modifier.getDrifts(tauovermass, grads_now, drifts); + sft.drift_modifier.getDrifts(taus.tauovermass, grads_now, drifts); - std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(), - [sqrttau](const PosType& drift, const PosType& delta_r) { - return drift + (sqrttau * delta_r); - }); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + drifts.rs[ip] = drifts.rs[ip] + (taus.sqrttau * deltas.rs[ip]); + if constexpr (std::is_same>::value) + { + drifts.spins[ip] = drifts.spins[ip] + taus.spin_sqrttau * deltas.spins[ip]; + } + } } else { - std::transform(delta_r_start, delta_r_end, drifts.begin(), - [sqrttau](const PosType& delta_r) { return sqrttau * delta_r; }); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + drifts.rs[ip] = drifts.rs[ip] + (taus.sqrttau * deltas.rs[ip]); + if constexpr (std::is_same>::value) + { + drifts.spins[ip] = drifts.spins[ip] + taus.spin_sqrttau * deltas.spins[ip]; + } + } } - ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts); + ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts.rs); // This is inelegant if (use_drift) { twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new); - std::transform(delta_r_start, delta_r_end, log_gf.begin(), - [](const PosType& delta_r) { return mhalf * dot(delta_r, delta_r); }); - sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts); + for (std::size_t ip = delta_start; ip < delta_end; ++ip) + { + constexpr RealType mhalf(-0.5); + log_gf[ip] = mhalf * dot(deltas.rs[ip], deltas.rs[ip]); + } - std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(), + sft.drift_modifier.getDrifts(taus.tauovermass, grads_new, drifts); + + std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.rs.begin(), drifts.rs.begin(), [iat](const ParticleSet& elecs, const PosType& drift) { return elecs.R[iat] - elecs.getActivePos() - drift; }); - std::transform(drifts.begin(), drifts.end(), log_gb.begin(), - [oneover2tau](const PosType& drift) { return -oneover2tau * dot(drift, drift); }); + std::transform(drifts.rs.begin(), drifts.rs.end(), log_gb.begin(), + [taus](const PosType& drift) { return -taus.oneover2tau * dot(drift, drift); }); } else { @@ -220,12 +243,11 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, /** Thread body for VMC step * */ -template void VMCBatched::runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - CFS& context_for_steps, - std::vector>& crowds) + UPtrVector& context_for_steps, + UPtrVector& crowds) { Crowd& crowd = *(crowds[crowd_id]); crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen()); @@ -235,7 +257,12 @@ void VMCBatched::runVMCStep(int crowd_id, const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); // For VMC we don't call this method for warmup steps. const bool accumulate_this_step = true; - advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step); + if (sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], + recompute_this_step, accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute_this_step, + accumulate_this_step); } void VMCBatched::process(xmlNodePtr node) @@ -263,10 +290,6 @@ int VMCBatched::compute_samples_per_rank(const QMCDriverInput& qmcdriver_input, return nblocks * nsteps * local_walkers; } -bool VMCBatched::run() -{ - return std::visit([&](auto& var) -> bool { return run_impl(var); }, step_contexts_); -} /** Runs the actual VMC section * * Dependent on base class state machine @@ -279,8 +302,7 @@ bool VMCBatched::run() * If does consider giving more to the thread by value that should * end up thread local. (I think) */ -template -bool VMCBatched::run_impl(CONTEXTFORSTEPS& step_contexts) +bool VMCBatched::run() { IndexType num_blocks = qmcdriver_input_.get_max_blocks(); //start the main estimator @@ -295,8 +317,7 @@ bool VMCBatched::run_impl(CONTEXTFORSTEPS& step_contexts) { // walker initialization ScopedTimer local_timer(timers_.init_walkers_timer); ParallelExecutor<> section_start_task; - section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), - step_contexts); + section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), step_contexts_); } print_mem("VMCBatched after initialLogEvaluation", app_summary()); @@ -311,14 +332,16 @@ bool VMCBatched::run_impl(CONTEXTFORSTEPS& step_contexts) Crowd& crowd = *(crowds[crowd_id]); const bool recompute = false; const bool accumulate_this_step = false; - advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); + if(sft.population.get_golden_electrons()->isSpinor()) + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); + else + advanceWalkers>(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); }; for (int step = 0; step < qmcdriver_input_.get_warmup_steps(); ++step) { ScopedTimer local_timer(timers_.run_steps_timer); - crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), step_contexts, - std::ref(crowds_)); + crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), step_contexts_, std::ref(crowds_)); } app_log() << "Warm-up is completed!" << std::endl; @@ -342,7 +365,7 @@ bool VMCBatched::run_impl(CONTEXTFORSTEPS& step_contexts) { ScopedTimer local_timer(timers_.run_steps_timer); vmc_state.step = step; - crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, step_contexts, std::ref(crowds_)); + crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, step_contexts_, std::ref(crowds_)); if (collect_samples_) { @@ -415,7 +438,4 @@ void VMCBatched::enable_sample_collection() app_log() << " total samples = " << total_samples << '\n'; } -template bool VMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); -template bool VMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); - } // namespace qmcplusplus diff --git a/src/QMCDrivers/VMC/VMCBatched.h b/src/QMCDrivers/VMC/VMCBatched.h index a507832aaf..70098dbe9d 100644 --- a/src/QMCDrivers/VMC/VMCBatched.h +++ b/src/QMCDrivers/VMC/VMCBatched.h @@ -50,6 +50,7 @@ class VMCBatched : public QMCDriverNew const VMCDriverInput& vmcdrv_input; const DriftModifierBase& drift_modifier; const MCPopulation& population; + const MCCoordsTypes mc_coord_type; IndexType recalculate_properties_period; IndexType step = -1; bool is_recomputing_block = false; @@ -58,7 +59,7 @@ class VMCBatched : public QMCDriverNew const VMCDriverInput& vmci, DriftModifierBase& drift_mod, MCPopulation& pop) - : qmcdrv_input(qmci), vmcdrv_input(vmci), drift_modifier(drift_mod), population(pop) + : qmcdrv_input(qmci), vmcdrv_input(vmci), drift_modifier(drift_mod), population(pop), mc_coord_type(population.get_golden_electrons()->isSpinor() ? MCCoordsTypes::RSSPINS : MCCoordsTypes::RS) {} }; @@ -75,29 +76,26 @@ class VMCBatched : public QMCDriverNew bool run() override; - template - bool run_impl(CFS& cfs); /** Refactor of VMCUpdatePbyP in crowd context * * MCWalkerConfiguration layer removed. * Obfuscation of state changes via buffer and MCWalkerconfiguration require this be tested well */ - template + template static void advanceWalkers(const StateForThread& sft, Crowd& crowd, DriverTimers& timers, - CFS& move_context, + ContextForSteps& move_context, bool recompute, bool accumulate_this_step); // This is the task body executed at crowd scope // it does not have access to object member variables by design - template static void runVMCStep(int crowd_id, const StateForThread& sft, DriverTimers& timers, - CFS& context_for_steps, - std::vector>& crowds); + UPtrVector& context_for_steps, + UPtrVector& crowds); /** transitional interface on the way to better walker count adjustment handling. * returns a closure taking walkers per rank and accomplishing what calc_default_local_walkers does. @@ -125,7 +123,6 @@ class VMCBatched : public QMCDriverNew /// Copy operator (disabled). VMCBatched& operator=(const VMCBatched&) = delete; - /// Storage for samples (later used in optimizer) SampleStack& samples_; /// Sample collection flag @@ -139,9 +136,6 @@ class VMCBatched : public QMCDriverNew extern std::ostream& operator<<(std::ostream& o_stream, const VMCBatched& vmc_batched); -extern template bool VMCBatched::run_impl(QMCDriverNew::SpinSymContexts& ssc); -extern template bool VMCBatched::run_impl(QMCDriverNew::SpinorContexts& ssc); - } // namespace qmcplusplus #endif From 0492ba2bb991019cdc36f1e9f91c5ce49cb062df Mon Sep 17 00:00:00 2001 From: Peter Doak Date: Wed, 26 Jan 2022 12:16:43 -0500 Subject: [PATCH 4/4] was excited, obligatory missing files --- src/QMCDrivers/MCCoords.cpp | 31 ++++++++++ src/QMCDrivers/MCCoords.hpp | 118 ++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/QMCDrivers/MCCoords.cpp create mode 100644 src/QMCDrivers/MCCoords.hpp diff --git a/src/QMCDrivers/MCCoords.cpp b/src/QMCDrivers/MCCoords.cpp new file mode 100644 index 0000000000..1de9a4abcb --- /dev/null +++ b/src/QMCDrivers/MCCoords.cpp @@ -0,0 +1,31 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "MCCoords.hpp" + +namespace qmcplusplus +{ +template +void MCCoords::resize(const std::size_t size) +{ + rs.resize(size); +} + +// template<> +// void MCCoords::resize(const std::size_t size) +// { +// rs.resize(size); +// spins.resize(size); +// } + +template struct MCCoords; +template struct MCCoords; +} diff --git a/src/QMCDrivers/MCCoords.hpp b/src/QMCDrivers/MCCoords.hpp new file mode 100644 index 0000000000..d14e455225 --- /dev/null +++ b/src/QMCDrivers/MCCoords.hpp @@ -0,0 +1,118 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_MCCOORDS_HPP +#define QMCPLUSPLUS_MCCOORDS_HPP + +#include "Configuration.h" +#include "type_traits/complex_help.hpp" +#include "ParticleBase/RandomSeqGenerator.h" + +#include + +namespace qmcplusplus +{ + +enum class MCCoordsTypes +{ + RS, + RSSPINS +}; + +template +struct MCCoords; + +template +struct MCCoords +{ + static constexpr MCCoordsTypes mct = MCT; + // This cleans up some other code. + void resize(const std::size_t size); + std::vector rs; +}; + +template<> +struct MCCoords +{ + static constexpr MCCoordsTypes mct = MCCoordsTypes::RSSPINS; + // This cleans up some other code. + void resize(const std::size_t size) + { + rs.resize(size); + spins.resize(size); + } + std::vector rs; + std::vector> spins; +}; + +template +struct MCCIt +{ + std::vector::iterator irs; +}; + +template<> +struct MCCIt +{ + std::vector::iterator irs; + std::vector>::iterator spins; +}; + +template +struct Taus +{ + Real tauovermass; + Real oneover2tau; + Real sqrttau ; + + Taus(Real tau, Real grp_inv_mass) { + Real tauovermass = tau * grp_inv_mass; + Real oneover2tau = 0.5 / (tauovermass); + Real sqrttau = std::sqrt(tauovermass); + } +}; + +template +struct Taus : public Taus +{ + using Base = Taus; + Real spin_tauovermass; + Real spin_oneover2tau; + Real spin_sqrttau ; + Taus(Real tau, Real grp_inv_mass, Real spin_mass) : Base(tau, grp_inv_mass) + { + Real spin_tauovermass = Base::tauovermass / spin_mass; + Real spin_oneover2tau = 0.5 / (spin_tauovermass); + Real spin_sqrttau = std::sqrt(spin_tauovermass); + } +}; + + +template +MCC generateDeltas(RNG rng, size_t num_rs) +{ + MCC mc_coords; + mc_coords.rs.resize(num_rs); + makeGaussRandomWithEngine(mc_coords.rs, rng); + // hate to repeat this pattern, this should never resize. + if constexpr (std::is_same>::value) + { + mc_coords.spins.resize(num_rs); + makeGaussRandomWithEngine(mc_coords.spins, rng); + } + return mc_coords; +} + +extern template struct MCCoords; +extern template struct MCCoords; +} // namespace qmcplusplus + +#endif