From aa272777f5dd86da274b66c5415a8e13407be041 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Sat, 18 May 2019 18:12:54 -0500 Subject: [PATCH 01/29] Add DiracDeterminant from QMCPACK. --- src/Drivers/CMakeLists.txt | 2 +- src/Numerics/BlasThreadingEnv.h | 59 +++++ src/QMCWaveFunctions/DelayedUpdate.h | 236 ++++++++++++++++++ src/QMCWaveFunctions/DiracDeterminant.cpp | 287 ++++++++++++++++++++++ src/QMCWaveFunctions/DiracDeterminant.h | 155 ++++++++++++ src/QMCWaveFunctions/DiracMatrix.h | 195 +++++++++++++++ src/QMCWaveFunctions/WaveFunction.cpp | 8 +- src/Utilities/Configuration.h | 58 +++-- src/Utilities/QMCTypes.h | 50 ++++ src/Utilities/SIMD/algorithm.hpp | 18 ++ src/Utilities/scalar_traits.h | 50 ++++ 11 files changed, 1094 insertions(+), 24 deletions(-) create mode 100644 src/Numerics/BlasThreadingEnv.h create mode 100644 src/QMCWaveFunctions/DelayedUpdate.h create mode 100644 src/QMCWaveFunctions/DiracDeterminant.cpp create mode 100644 src/QMCWaveFunctions/DiracDeterminant.h create mode 100644 src/QMCWaveFunctions/DiracMatrix.h create mode 100644 src/Utilities/QMCTypes.h create mode 100644 src/Utilities/scalar_traits.h diff --git a/src/Drivers/CMakeLists.txt b/src/Drivers/CMakeLists.txt index 6dd514cf4..d68def337 100644 --- a/src/Drivers/CMakeLists.txt +++ b/src/Drivers/CMakeLists.txt @@ -8,7 +8,7 @@ FOREACH(p ${ESTEST}) TARGET_LINK_LIBRARIES(${p} qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) ENDFOREACH(p ${ESTEST}) -ADD_LIBRARY(miniwfs ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp) +ADD_LIBRARY(miniwfs ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp ../QMCWaveFunctions/DiracDeterminant.cpp) SET(DRIVERS miniqmc miniqmc_sync_move) diff --git a/src/Numerics/BlasThreadingEnv.h b/src/Numerics/BlasThreadingEnv.h new file mode 100644 index 000000000..f71fa864b --- /dev/null +++ b/src/Numerics/BlasThreadingEnv.h @@ -0,0 +1,59 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_BLAS_THREADING_ENV_H +#define QMCPLUSPLUS_BLAS_THREADING_ENV_H + +#include "config.h" +#include "Utilities/Configuration.h" +#ifdef HAVE_MKL +#include +#endif + +namespace qmcplusplus +{ +/** service class for explicitly managing the threading of BLAS/LAPACK calls from OpenMP parallel region + * + * intended to use only locally around heavy calls. + */ +class BlasThreadingEnv +{ + int old_state; + +public: + /// Constructor, obtains the number of threads at the next level + BlasThreadingEnv(int num_threads) + { +#ifdef HAVE_MKL + old_state = mkl_set_num_threads_local(num_threads); +#endif + } + + ~BlasThreadingEnv() + { +#ifdef HAVE_MKL + mkl_set_num_threads_local(old_state); +#endif + } + + static bool NestedThreadingSupported() + { +#ifdef HAVE_MKL + return true; +#else + return false; +#endif + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/DelayedUpdate.h b/src/QMCWaveFunctions/DelayedUpdate.h new file mode 100644 index 000000000..dc2e6ab0b --- /dev/null +++ b/src/QMCWaveFunctions/DelayedUpdate.h @@ -0,0 +1,236 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DELAYED_UPDATE_H +#define QMCPLUSPLUS_DELAYED_UPDATE_H + +#include "config.h" +#include +#include +#include "Numerics/OhmmsBlas.h" +#include "QMCWaveFunctions/DiracMatrix.h" +#include "Numerics/BlasThreadingEnv.h" + +namespace qmcplusplus +{ + +/** implements delayed update on CPU using BLAS + * @tparam T base precision for most computation + * @tparam T_FP high precision for matrix inversion, T_FP >= T + */ +template +class DelayedUpdate +{ + /// define real type + using real_type = typename scalar_traits::real_type; + /// orbital values of delayed electrons + Matrix U; + /// rows of Ainv corresponding to delayed electrons + Matrix V; + /// Matrix inverse of B, at maximum KxK + Matrix Binv; + /// scratch space, used during inverse update + Matrix tempMat; + /// temporal scratch space used by SM-1 + Vector temp; + /// new column of B + Vector p; + /// list of delayed electrons + std::vector delay_list; + /// current number of delays, increase one for each acceptance, reset to 0 after updating Ainv + int delay_count; + /// matrix inversion engine + DiracMatrix detEng; + +public: + /// default constructor + DelayedUpdate() : delay_count(0) {} + + /** resize the internal storage + * @param norb number of electrons/orbitals + * @param delay, maximum delay 0& logdetT, Matrix& Ainv, real_type& LogValue, real_type& PhaseValue) + { + detEng.invert_transpose(logdetT, Ainv, LogValue, PhaseValue); + // safe mechanism + delay_count = 0; + } + + /** initialize internal objects when Ainv is refreshed + * @param Ainv inverse matrix + */ + inline void initializeInv(const Matrix& Ainv) + { + // safe mechanism + delay_count = 0; + } + + /** compute the row of up-to-date Ainv + * @param Ainv inverse matrix + * @param rowchanged the row id corresponding to the proposed electron + */ + template + inline void getInvRow(const Matrix& Ainv, int rowchanged, VVT& invRow) + { + if (delay_count == 0) + { + // Ainv is fresh, directly access Ainv + std::copy_n(Ainv[rowchanged], invRow.size(), invRow.data()); + return; + } + const T cone(1); + const T czero(0); + const int norb = Ainv.rows(); + const int lda_Binv = Binv.cols(); + // save Ainv[rowchanged] to invRow + std::copy_n(Ainv[rowchanged], norb, invRow.data()); + // multiply V (NxK) Binv(KxK) U(KxN) invRow right to the left + BLAS::gemv('T', norb, delay_count, cone, U.data(), norb, invRow.data(), 1, czero, p.data(), 1); + BLAS::gemv('N', delay_count, delay_count, cone, Binv.data(), lda_Binv, p.data(), 1, czero, Binv[delay_count], 1); + BLAS::gemv('N', norb, delay_count, -cone, V.data(), norb, Binv[delay_count], 1, cone, invRow.data(), 1); + } + + /** accept a move with the update delayed + * @param Ainv inverse matrix + * @param rowchanged the row id corresponding to the proposed electron + * @param psiV new orbital values + * + * Before delay_count reaches the maximum delay, only Binv is updated with a recursive algorithm + */ + template + inline void acceptRow(Matrix& Ainv, int rowchanged, const VVT& psiV) + { + const T cminusone(-1); + const T czero(0); + const int norb = Ainv.rows(); + const int lda_Binv = Binv.cols(); + std::copy_n(Ainv[rowchanged], norb, V[delay_count]); + std::copy_n(psiV.data(), norb, U[delay_count]); + delay_list[delay_count] = rowchanged; + // the new Binv is [[X Y] [Z x]] + BLAS::gemv('T', norb, delay_count + 1, cminusone, V.data(), norb, psiV.data(), 1, czero, p.data(), 1); + // x + T y = -p[delay_count]; + for (int i = 0; i < delay_count; i++) + y += Binv[delay_count][i] * p[i]; + Binv[delay_count][delay_count] = y = T(1) / y; + // Y + BLAS::gemv('T', delay_count, delay_count, y, Binv.data(), lda_Binv, p.data(), 1, czero, Binv.data() + delay_count, + lda_Binv); + // X + BLAS::ger(delay_count, delay_count, cminusone, Binv[delay_count], 1, Binv.data() + delay_count, lda_Binv, + Binv.data(), lda_Binv); + // Z + for (int i = 0; i < delay_count; i++) + Binv[delay_count][i] *= -y; + delay_count++; + // update Ainv when maximal delay is reached + if (delay_count == lda_Binv) + updateInvMat(Ainv); + } + + /** update the full Ainv and reset delay_count + * @param Ainv inverse matrix + */ + inline void updateInvMat(Matrix& Ainv) + { + if (delay_count == 0) + return; + // update the inverse matrix + const T cone(1); + const T czero(0); + const int norb = Ainv.rows(); + if (delay_count == 1) + { + // this is a special case invoking the Fahy's variant of Sherman-Morrison update. + // Only use the first norb elements of tempMat as a temporal array + BLAS::gemv('T', norb, norb, cone, Ainv.data(), norb, U[0], 1, czero, temp.data(), 1); + temp[delay_list[0]] -= cone; + BLAS::ger(norb, norb, -Binv[0][0], V[0], 1, temp.data(), 1, Ainv.data(), norb); + } + else + { + const int lda_Binv = Binv.cols(); + int num_threads_nested = getNumThreadsNested(); + // always use serial when norb is small or only one second level thread + bool use_serial(norb <= 256 || num_threads_nested == 1); + if (use_serial || BlasThreadingEnv::NestedThreadingSupported()) + { + // threading depends on BLAS + BlasThreadingEnv knob(use_serial ? 1 : num_threads_nested); + BLAS::gemm('T', 'N', delay_count, norb, norb, cone, U.data(), norb, Ainv.data(), norb, czero, tempMat.data(), + lda_Binv); + for (int i = 0; i < delay_count; i++) + tempMat(delay_list[i], i) -= cone; + BLAS::gemm('N', 'N', norb, delay_count, delay_count, cone, V.data(), norb, Binv.data(), lda_Binv, czero, + U.data(), norb); + BLAS::gemm('N', 'N', norb, norb, delay_count, -cone, U.data(), norb, tempMat.data(), lda_Binv, cone, + Ainv.data(), norb); + } + else + { + // manually threaded version of the above GEMM calls +#pragma omp parallel + { + const int block_size = getAlignedSize((norb + num_threads_nested - 1) / num_threads_nested); + int num_block = (norb + block_size - 1) / block_size; +#pragma omp for + for (int ix = 0; ix < num_block; ix++) + { + int x_offset = ix * block_size; + BLAS::gemm('T', 'N', delay_count, std::min(norb - x_offset, block_size), norb, cone, U.data(), norb, + Ainv[x_offset], norb, czero, tempMat[x_offset], lda_Binv); + } +#pragma omp master + for (int i = 0; i < delay_count; i++) + tempMat(delay_list[i], i) -= cone; +#pragma omp for + for (int iy = 0; iy < num_block; iy++) + { + int y_offset = iy * block_size; + BLAS::gemm('N', 'N', std::min(norb - y_offset, block_size), delay_count, delay_count, cone, + V.data() + y_offset, norb, Binv.data(), lda_Binv, czero, U.data() + y_offset, norb); + } +#pragma omp for collapse(2) nowait + for (int iy = 0; iy < num_block; iy++) + for (int ix = 0; ix < num_block; ix++) + { + int x_offset = ix * block_size; + int y_offset = iy * block_size; + BLAS::gemm('N', 'N', std::min(norb - y_offset, block_size), std::min(norb - x_offset, block_size), + delay_count, -cone, U.data() + y_offset, norb, tempMat[x_offset], lda_Binv, cone, + Ainv[x_offset] + y_offset, norb); + } + } + } + } + delay_count = 0; + } +}; +} // namespace qmcplusplus + +#endif // QMCPLUSPLUS_DELAYED_UPDATE_H diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp new file mode 100644 index 000000000..a3215b70f --- /dev/null +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -0,0 +1,287 @@ +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#include "QMCWaveFunctions/DiracDeterminant.h" +//#include "Numerics/DeterminantOperators.h" +#include "Numerics/OhmmsBlas.h" +//#include "Numerics/MatrixOperators.h" +//#include "simd/simd.hpp" + +namespace qmcplusplus +{ +/** constructor + *@param spos the single-particle orbital set + *@param first index of the first particle + */ +template +DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int delay) + : ndelay(1), invRow_id(-1), + Phi(spos), + FirstIndex(first), + LastIndex(first + spos->size()), + ndelay(delay), + NumPtcls(spos->size()), + NumOrbitals(spos->size()), + UpdateTimer("DiracDeterminantBase::update", timer_level_fine), + RatioTimer("DiracDeterminantBase::ratio", timer_level_fine), + InverseTimer("DiracDeterminantBase::inverse", timer_level_fine), + BufferTimer("DiracDeterminantBase::buffer", timer_level_fine), + SPOVTimer("DiracDeterminantBase::spoval", timer_level_fine), + SPOVGLTimer("DiracDeterminantBase::spovgl", timer_level_fine) + { + registerTimers(); + resize(spos->size(), spos->size()); + } + +template +void DiracDeterminant::invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat) +{ + InverseTimer.start(); + updateEng.invert_transpose(logdetT, invMat, LogValue, PhaseValue); + InverseTimer.stop(); +} + + +///reset the size: with the number of particles and number of orbtials +template +void DiracDeterminant::resize(int nel, int morb) +{ + int norb = morb; + if (norb <= 0) + norb = nel; // for morb == -1 (default) + updateEng.resize(norb, ndelay); + psiM.resize(nel, norb); + dpsiM.resize(nel, norb); + d2psiM.resize(nel, norb); + psiV.resize(norb); + invRow.resize(norb); + psiM_temp.resize(nel, norb); + LastIndex = FirstIndex + nel; + NumPtcls = nel; + NumOrbitals = norb; + + dpsiV.resize(NumOrbitals); + d2psiV.resize(NumOrbitals); +} + +template +typename DiracDeterminant::GradType DiracDeterminant::evalGrad(ParticleSet& P, int iat) +{ + const int WorkingIndex = iat - FirstIndex; + RatioTimer.start(); + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + GradType g = simd::dot(invRow.data(), dpsiM[WorkingIndex], invRow.size()); + RatioTimer.stop(); + return g; +} + +template +typename DiracDeterminant::ValueType DiracDeterminant::ratioGrad(ParticleSet& P, + int iat, + GradType& grad_iat) +{ + SPOVGLTimer.start(); + Phi->evaluate(P, iat, psiV, dpsiV, d2psiV); + SPOVGLTimer.stop(); + RatioTimer.start(); + const int WorkingIndex = iat - FirstIndex; + UpdateMode = ORB_PBYP_PARTIAL; + GradType rv; + + // This is an optimization. + // check invRow_id against WorkingIndex to see if getInvRow() has been called already + // Some code paths call evalGrad before calling ratioGrad. + if (invRow_id != WorkingIndex) + { + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + } + curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); + grad_iat += ((RealType)1.0 / curRatio) * simd::dot(invRow.data(), dpsiV.data(), invRow.size()); + RatioTimer.stop(); + return curRatio; +} + +/** move was accepted, update the real container +*/ +template +void DiracDeterminant::acceptMove(ParticleSet& P, int iat) +{ + const int WorkingIndex = iat - FirstIndex; + PhaseValue += evaluatePhase(curRatio); + LogValue += std::log(std::abs(curRatio)); + UpdateTimer.start(); + updateEng.acceptRow(psiM, WorkingIndex, psiV); + // invRow becomes invalid after accepting a move + invRow_id = -1; + if (UpdateMode == ORB_PBYP_PARTIAL) + { + simd::copy(dpsiM[WorkingIndex], dpsiV.data(), NumOrbitals); + simd::copy(d2psiM[WorkingIndex], d2psiV.data(), NumOrbitals); + } + UpdateTimer.stop(); + curRatio = 1.0; +} + +template +void DiracDeterminant::completeUpdates() +{ + UpdateTimer.start(); + // invRow becomes invalid after updating the inverse matrix + invRow_id = -1; + updateEng.updateInvMat(psiM); + UpdateTimer.stop(); +} + +template +void DiracDeterminant::evaluateGL(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L, + bool fromscratch) +{ + if (UpdateMode == ORB_PBYP_RATIO) + { //need to compute dpsiM and d2psiM. Do not touch psiM! + SPOVGLTimer.start(); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); + SPOVGLTimer.stop(); + } + + if (NumPtcls == 1) + { + ValueType y = psiM(0, 0); + GradType rv = y * dpsiM(0, 0); + G[FirstIndex] += rv; + L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); + } + else + { + for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) + { + mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); + mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); + G[iat] += rv; + L[iat] += dot_temp - dot(rv, rv); + } + } +} + +/** return the ratio only for the iat-th partcle move + * @param P current configuration + * @param iat the particle thas is being moved + */ +template +typename DiracDeterminant::ValueType DiracDeterminant::ratio(ParticleSet& P, int iat) +{ + UpdateMode = ORB_PBYP_RATIO; + const int WorkingIndex = iat - FirstIndex; + SPOVTimer.start(); + Phi->evaluate(P, iat, psiV); + SPOVTimer.stop(); + RatioTimer.start(); + // This is an optimization. + // check invRow_id against WorkingIndex to see if getInvRow() has been called + // This is intended to save redundant compuation in TM1 and TM3 + if (invRow_id != WorkingIndex) + { + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + } + curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); + RatioTimer.stop(); + return curRatio; +} + +/* Ye: to be recovered +template +void DiracDeterminant::evaluateRatios(VirtualParticleSet& VP, std::vector& ratios) +{ + SPOVTimer.start(); + const int WorkingIndex = VP.refPtcl - FirstIndex; + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + Phi->evaluateDetRatios(VP, psiV, invRow, ratios); + SPOVTimer.stop(); +} +*/ + +/** Calculate the log value of the Dirac determinant for particles + *@param P input configuration containing N particles + *@param G a vector containing N gradients + *@param L a vector containing N laplacians + *@return the value of the determinant + * + *\f$ (first,first+nel). \f$ Add the gradient and laplacian + *contribution of the determinant to G(radient) and L(aplacian) + *for local energy calculations. + */ +template +typename DiracDeterminant::RealType DiracDeterminant::evaluateLog(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L) +{ + recompute(P); + + if (NumPtcls == 1) + { + ValueType y = psiM(0, 0); + GradType rv = y * dpsiM(0, 0); + G[FirstIndex] += rv; + L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); + } + else + { + for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) + { + mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); + mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); + G[iat] += rv; + L[iat] += lap - dot(rv, rv); + } + } + return LogValue; +} + +template +void DiracDeterminant::recompute(ParticleSet& P) +{ + SPOVGLTimer.start(); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); + SPOVGLTimer.stop(); + if (NumPtcls == 1) + { + //CurrentDet=psiM(0,0); + ValueType det = psiM_temp(0, 0); + psiM(0, 0) = RealType(1) / det; + LogValue = evaluateLogAndPhase(det, PhaseValue); + } + else + { + invertPsiM(psiM_temp, psiM); + } +} + +typedef QMCTraits::ValueType ValueType; +typedef QMCTraits::QTFull::ValueType mValueType; + +template class DiracDeterminant<>; +#if defined(ENABLE_CUDA) +template class DiracDeterminant>; +#endif + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/DiracDeterminant.h b/src/QMCWaveFunctions/DiracDeterminant.h new file mode 100644 index 000000000..04141d50b --- /dev/null +++ b/src/QMCWaveFunctions/DiracDeterminant.h @@ -0,0 +1,155 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/**@file DiracDeterminant.h + * @brief Declaration of DiracDeterminant with a S(ingle)P(article)O(rbital)Set + */ +#ifndef QMCPLUSPLUS_DIRACDETERMINANT_H +#define QMCPLUSPLUS_DIRACDETERMINANT_H + +#include "QMCWaveFunctions/WaveFunctionComponent.h" +#include "QMCWaveFunctions/SPOSet.h" +#include "Utilities/NewTimer.h" +#include "QMCWaveFunctions/DelayedUpdate.h" +#if defined(ENABLE_CUDA) +#include "QMCWaveFunctions/DelayedUpdateCUDA.h" +#endif + +namespace qmcplusplus +{ +template> +class DiracDeterminant : public WaveFunctionComponent +{ +public: + using ValueVector_t = Vector; + using ValueMatrix_t = Matrix; + using GradVector_t = Vector; + using GradMatrix_t = Matrix; + + using mValueType = QMCTraits::QTFull::ValueType; + using ValueMatrix_hp_t = Matrix; + using mGradType = TinyVector; + + /** constructor + *@param spos the single-particle orbital set + *@param first index of the first particle + */ + DiracDeterminant(SPOSet* const spos, int first = 0, int delay = 1); + + // copy constructor and assign operator disabled + DiracDeterminant(const DiracDeterminant& s) = delete; + DiracDeterminant& operator=(const DiracDeterminant& s) = delete; + + ///invert psiM or its copies + void invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat); + + void evaluateGL(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L, bool fromscratch = false); + + /** return the ratio only for the iat-th partcle move + * @param P current configuration + * @param iat the particle thas is being moved + */ + ValueType ratio(ParticleSet& P, int iat); + + /** compute multiple ratios for a particle move + void evaluateRatios(VirtualParticleSet& VP, std::vector& ratios); + */ + + ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat); + GradType evalGrad(ParticleSet& P, int iat); + + /** move was accepted, update the real container + */ + void acceptMove(ParticleSet& P, int iat); + void completeUpdates(); + + ///evaluate log of a determinant for a particle set + RealType evaluateLog(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L); + + void recompute(ParticleSet& P); + + /// psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ + ValueMatrix_t psiM_temp; + + /// inverse transpose of psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ + ValueMatrix_t psiM; + + /// dpsiM(i,j) \f$= \nabla_i \psi_j({\bf r}_i)\f$ + GradMatrix_t dpsiM; + + /// d2psiM(i,j) \f$= \nabla_i^2 \psi_j({\bf r}_i)\f$ + ValueMatrix_t d2psiM; + + /// value of single-particle orbital for particle-by-particle update + ValueVector_t psiV; + GradVector_t dpsiV; + ValueVector_t d2psiV; + + /// delayed update engine + DU_TYPE updateEng; + + /// the row of up-to-date inverse matrix + ValueVector_t invRow; + + /** row id correspond to the up-to-date invRow. [0 norb), invRow is ready; -1, invRow is not valid. + * This id is set after calling getInvRow indicating invRow has been prepared for the invRow_id row + * ratioGrad checks if invRow_id is consistent. If not, invRow needs to be recomputed. + * acceptMove and completeUpdates mark invRow invalid by setting invRow_id to -1 + */ + int invRow_id; + + ValueType curRatio; + +private: + /// Timers + NewTimer UpdateTimer, RatioTimer, InverseTimer, BufferTimer, SPOVTimer, SPOVGLTimer; + /// a set of single-particle orbitals used to fill in the values of the matrix + SPOSet* const Phi; + ///index of the first particle with respect to the particle set + int FirstIndex; + ///index of the last particle with respect to the particle set + int LastIndex; + ///number of single-particle orbitals which belong to this Dirac determinant + int NumOrbitals; + ///number of particles which belong to this Dirac determinant + int NumPtcls; + /// delayed update rank + int ndelay; + + + ///reset the size: with the number of particles and number of orbtials + void resize(int nel, int morb); + + /// register all the timers + void registerTimers() + { + UpdateTimer.reset(); + RatioTimer.reset(); + TimerManager.addTimer(&UpdateTimer); + TimerManager.addTimer(&RatioTimer); + TimerManager.addTimer(&InverseTimer); + TimerManager.addTimer(&BufferTimer); + TimerManager.addTimer(&SPOVTimer); + TimerManager.addTimer(&SPOVGLTimer); + } + +}; + + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h new file mode 100644 index 000000000..60ee5ce5c --- /dev/null +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -0,0 +1,195 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DIRAC_MATRIX_H +#define QMCPLUSPLUS_DIRAC_MATRIX_H + +#include "Numerics/Blasf.h" +#include "Numerics/OhmmsPETE/OhmmsMatrix.h" +#include "Numerics/BlasThreadingEnv.h" +#include "Utilities/scalar_traits.h" +#include "Utilities/SIMD/algorithm.hpp" + +namespace qmcplusplus +{ +inline void Xgetrf(int n, int m, float* restrict a, int lda, int* restrict piv) +{ + int status; + sgetrf(n, m, a, lda, piv, status); +} + +inline void Xgetri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork) +{ + int status; + sgetri(n, a, lda, piv, work, lwork, status); +} + +inline void Xgetrf(int n, int m, std::complex* restrict a, int lda, int* restrict piv) +{ + int status; + cgetrf(n, m, a, lda, piv, status); +} + +/** inversion of a float matrix after lu factorization*/ +inline void Xgetri(int n, + std::complex* restrict a, + int lda, + int* restrict piv, + std::complex* restrict work, + int& lwork) +{ + int status; + cgetri(n, a, lda, piv, work, lwork, status); +} + +inline void Xgetrf(int n, int m, double* restrict a, int lda, int* restrict piv) +{ + int status; + dgetrf(n, m, a, lda, piv, status); +} + +inline void Xgetri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork) +{ + int status; + dgetri(n, a, lda, piv, work, lwork, status); +} + +inline void Xgetrf(int n, int m, std::complex* restrict a, int lda, int* restrict piv) +{ + int status; + zgetrf(n, m, a, lda, piv, status); +} + +/** inversion of a std::complex matrix after lu factorization*/ +inline void Xgetri(int n, + std::complex* restrict a, + int lda, + int* restrict piv, + std::complex* restrict work, + int& lwork) +{ + int status; + zgetri(n, a, lda, piv, work, lwork, status); +} + + +template +inline void TansposeSquare(const TIN* restrict in, TOUT* restrict out, size_t n, size_t lda) +{ +#pragma omp simd + for (size_t i = 0; i < n; ++i) + for (size_t j = 0; j < n; ++j) + out[i * lda + j] = in[i + j * lda]; +} + + +template +inline T computeLogDet(const T* restrict diag, int n, const int* restrict pivot, T& phase) +{ + T logdet(0); + int sign_det = 1; + for (size_t i = 0; i < n; i++) + { + sign_det *= (pivot[i] == i + 1) ? 1 : -1; + sign_det *= (diag[i] > 0) ? 1 : -1; + logdet += std::log(std::abs(diag[i])); + } + phase = (sign_det > 0) ? T(0) : M_PI; + return logdet; +} + +template +inline T computeLogDet(const std::complex* restrict diag, int n, const int* restrict pivot, T& phase) +{ + T logdet(0); + phase = T(0); + for (size_t i = 0; i < n; i++) + { + phase += std::arg(diag[i]); + if (pivot[i] != i + 1) + phase += M_PI; + logdet += std::log(diag[i].real() * diag[i].real() + diag[i].imag() * diag[i].imag()); + //slightly smaller error with the following + // logdet+=2.0*std::log(std::abs(x[ii]); + } + constexpr T one_over_2pi = T(1) / TWOPI; + phase -= std::floor(phase * one_over_2pi) * TWOPI; + return 0.5 * logdet; +} + +template +class DiracMatrix +{ + typedef typename scalar_traits::real_type real_type; + typedef typename scalar_traits::real_type real_type_fp; + aligned_vector m_work; + aligned_vector m_pivot; + int Lwork; + /// scratch space used for mixed precision + Matrix psiM_fp; + /// LU diagonal elements + aligned_vector LU_diag; + + /// reset internal work space + inline void reset(T_FP* invMat_ptr, const int lda) + { + m_pivot.resize(lda); + Lwork = -1; + T_FP tmp; + real_type_fp lw; + Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork); + convert(tmp, lw); + Lwork = static_cast(lw); + m_work.resize(Lwork); + LU_diag.resize(lda); + } + +public: + + DiracMatrix() : Lwork(0) {} + + /** compute the inverse of the transpose of matrix A + * assume precision T_FP >= T, do the inversion always with T_FP + */ + inline void invert_transpose(const Matrix& amat, + Matrix& invMat, + real_type& LogDet, + real_type& Phase) + { + BlasThreadingEnv knob(getNumThreadsNested()); + const int n = invMat.rows(); + const int lda = invMat.cols(); + T_FP* invMat_ptr(nullptr); +#if !defined(MIXED_PRECISION) + simd::transpose(amat.data(), n, amat.cols(), invMat.data(), n, invMat.cols()); + invMat_ptr = invMat.data(); +#else + psiM_fp.resize(n,lda); + simd::transpose(amat.data(), n, amat.cols(), psiM_fp.data(), n, psiM_fp.cols()); + invMat_ptr = psiM_fp.data(); +#endif + if (Lwork < lda) + reset(invMat_ptr, lda); + Xgetrf(n, n, invMat_ptr, lda, m_pivot.data()); + for(int i=0; i -#include +#include #include #include #include @@ -104,7 +104,7 @@ void build_WaveFunction(bool useRef, using J1OrbType = OneBodyJastrow>; using J2OrbType = TwoBodyJastrow>; using J3OrbType = ThreeBodyJastrow; - using DetType = DiracDeterminant; + using DetType = DiracDeterminant<>; ions.RSoA = ions.R; els.RSoA = els.R; @@ -115,8 +115,8 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; - WF.Det_up = new DetType(nelup, RNG, 0); - WF.Det_dn = new DetType(els.getTotalNum() - nelup, RNG, nelup); + WF.Det_up = new DetType(WF.spo, 0, 1); + WF.Det_dn = new DetType(WF.spo, 0, 1); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); diff --git a/src/Utilities/Configuration.h b/src/Utilities/Configuration.h index 9045bd9d5..fd2ebec09 100644 --- a/src/Utilities/Configuration.h +++ b/src/Utilities/Configuration.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include "Particle/Lattice/CrystalLattice.h" @@ -44,10 +45,26 @@ typedef int omp_int_t; inline omp_int_t omp_get_thread_num() { return 0; } inline omp_int_t omp_get_max_threads() { return 1; } inline omp_int_t omp_get_num_threads() { return 1; } -inline omp_int_t omp_get_active_level() { return 0; } +inline omp_int_t omp_get_level() { return 0; } inline omp_int_t omp_get_ancestor_thread_num(int level) { return 0; } +inline bool omp_get_nested() { return false; } #endif +/// get the number of threads at the next nested level +inline int getNumThreadsNested() +{ + int num_threads = 1; + if (omp_get_nested()) + { +#pragma omp parallel + { +#pragma omp master + num_threads = omp_get_num_threads(); + } + } + return num_threads; +} + // define empty DEBUG_MEMORY #define DEBUG_MEMORY(msg) // uncomment this out to trace the call tree of destructors @@ -76,23 +93,24 @@ struct PtclAttribTraits */ struct QMCTraits { - // clang-format off - enum {DIM = OHMMS_DIM}; - typedef OHMMS_INDEXTYPE IndexType; - typedef OHMMS_PRECISION RealType; - typedef OHMMS_PRECISION_FULL EstimatorRealType; -#if defined(QMC_COMPLEX) - typedef std::complex ValueType; -#else - typedef OHMMS_PRECISION ValueType; -#endif - typedef std::complex ComplexType; - typedef TinyVector PosType; - typedef TinyVector GradType; - typedef Tensor TensorType; - // clang-format on + enum + { + DIM = OHMMS_DIM + }; + using QTBase = QMCTypes; + using QTFull = QMCTypes; + typedef QTBase::RealType RealType; + typedef QTBase::ComplexType ComplexType; + typedef QTBase::ValueType ValueType; + typedef QTBase::PosType PosType; + typedef QTBase::GradType GradType; + typedef QTBase::TensorType TensorType; + ///define other types + typedef OHMMS_INDEXTYPE IndexType; + typedef QTFull::RealType EstimatorRealType; }; + /** Particle traits to use UniformGridLayout for the ParticleLayout. */ struct PtclOnLatticeTraits @@ -100,9 +118,11 @@ struct PtclOnLatticeTraits // clang-format off typedef CrystalLattice ParticleLayout_t; - typedef int Index_t; - typedef OHMMS_PRECISION_FULL Scalar_t; - typedef std::complex Complex_t; + using QTFull = QMCTraits::QTFull; + + typedef int Index_t; + typedef QTFull::RealType Scalar_t; + typedef QTFull::ComplexType Complex_t; typedef ParticleLayout_t::SingleParticleIndex_t SingleParticleIndex_t; typedef ParticleLayout_t::SingleParticlePos_t SingleParticlePos_t; diff --git a/src/Utilities/QMCTypes.h b/src/Utilities/QMCTypes.h new file mode 100644 index 000000000..d3784da66 --- /dev/null +++ b/src/Utilities/QMCTypes.h @@ -0,0 +1,50 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2018 QMCPACK developers +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab +// Ye Luo, yeluo@anl.gov, Argonne National Lab +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Lab +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_QMC_TYPES_H +#define QMCPLUSPLUS_QMC_TYPES_H + +#include +#include +#include + +namespace qmcplusplus +{ +/* Facilitates use of full/mixed precision without rebuilding the code + * + * a template class may define its local types + * template + * class Foo + * { + * using FooTypes = QMCTypes; + * ... + * }; + * + */ +template +class QMCTypes final +{ +public: + using RealType = Precision; + using ComplexType = std::complex; +#ifdef QMC_COMPLEX + using ValueType = ComplexType; +#else + using ValueType = RealType; +#endif + using GradType = TinyVector; + using PosType = TinyVector; + using TensorType = Tensor; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Utilities/SIMD/algorithm.hpp b/src/Utilities/SIMD/algorithm.hpp index 0a3b7a59f..2750acdb8 100644 --- a/src/Utilities/SIMD/algorithm.hpp +++ b/src/Utilities/SIMD/algorithm.hpp @@ -29,6 +29,24 @@ inline T2 accumulate_n(const T1* restrict in, size_t n, T2 res) return res; } +/** transpose of A(m,n) to B(n,m) + * @param A starting address, A(m,lda) + * @param m number of A rows + * @param lda stride of A's row + * @param B starting address B(n,ldb) + * @param n number of B rows + * @param ldb stride of B's row + * + * Blas-like interface + */ +template +inline void transpose(const T* restrict A, size_t m, size_t lda, TO* restrict B, size_t n, size_t ldb) +{ + for (size_t i = 0; i < n; ++i) + for (size_t j = 0; j < m; ++j) + B[i * ldb + j] = A[j * lda + i]; +} + ///inner product template inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res) diff --git a/src/Utilities/scalar_traits.h b/src/Utilities/scalar_traits.h new file mode 100644 index 000000000..4954f5fa3 --- /dev/null +++ b/src/Utilities/scalar_traits.h @@ -0,0 +1,50 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_SCLAR_TRAITS_H +#define QMCPLUSPLUS_SCLAR_TRAITS_H + +#include + +namespace qmcplusplus +{ + +template +struct scalar_traits +{ + enum + { + DIM = 1 + }; + typedef T real_type; + typedef T value_type; + static inline T* get_address(T* a) { return a; } +}; + +template +struct scalar_traits> +{ + enum + { + DIM = 2 + }; + typedef T real_type; + typedef std::complex value_type; + static inline T* get_address(std::complex* a) { return reinterpret_cast(a); } +}; + +} // namespace qmcplusplus +#endif From 58c0633ffc58cb7256f98eff27f095ed208ca45b Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 3 Jun 2019 23:05:08 -0500 Subject: [PATCH 02/29] Now the code compiles. --- src/QMCWaveFunctions/DelayedUpdate.h | 1 + src/QMCWaveFunctions/DeterminantHelper.h | 80 +++++++++++++++++++++++ src/QMCWaveFunctions/DiracDeterminant.cpp | 6 +- src/QMCWaveFunctions/DiracMatrix.h | 1 + src/QMCWaveFunctions/SPOSet.h | 57 +++++++++++++++- src/Utilities/SIMD/algorithm.hpp | 39 +++++++++-- 6 files changed, 175 insertions(+), 9 deletions(-) create mode 100644 src/QMCWaveFunctions/DeterminantHelper.h diff --git a/src/QMCWaveFunctions/DelayedUpdate.h b/src/QMCWaveFunctions/DelayedUpdate.h index dc2e6ab0b..04abe2517 100644 --- a/src/QMCWaveFunctions/DelayedUpdate.h +++ b/src/QMCWaveFunctions/DelayedUpdate.h @@ -16,6 +16,7 @@ #include #include #include "Numerics/OhmmsBlas.h" +#include "QMCWaveFunctions/DeterminantHelper.h" #include "QMCWaveFunctions/DiracMatrix.h" #include "Numerics/BlasThreadingEnv.h" diff --git a/src/QMCWaveFunctions/DeterminantHelper.h b/src/QMCWaveFunctions/DeterminantHelper.h new file mode 100644 index 000000000..47189ae95 --- /dev/null +++ b/src/QMCWaveFunctions/DeterminantHelper.h @@ -0,0 +1,80 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_DETERMINANT_HELPER_H +#define QMCPLUSPLUS_DETERMINANT_HELPER_H + +#include + +namespace qmcplusplus +{ + +template +inline T evaluatePhase(T sign_v) +{ + return T((sign_v > 0) ? 0.0 : M_PI); +} + +template +inline T evaluatePhase(const std::complex& psi) +{ + return T(std::arg(psi)); +} + +/** evaluate the log(|psi|) and phase + * @param psi real/complex value + * @param phase phase of psi + * @return log(|psi|) + */ +template +inline T evaluateLogAndPhase(const T psi, T& phase) +{ + if (psi < 0.0) + { + phase = M_PI; + return std::log(-psi); + } + else + { + phase = 0.0; + return std::log(psi); + } +} + +template +inline T evaluateLogAndPhase(const std::complex& psi, T& phase) +{ + phase = std::arg(psi); + if (phase < 0.0) + phase += 2.0 * M_PI; + return std::log(std::abs(psi)); +} + +/** generic conversion from type T1 to type T2 using implicit conversion +*/ +template +inline void convert(const T1& in, T2& out) +{ + out = static_cast(in); +} + +/** specialization of conversion from complex to real +*/ +template +inline void convert(const std::complex& in, T2& out) +{ + out = static_cast(in.real()); +} + +} // namespace qmcplusplus + +#endif // QMCPLUSPLUS_DETERMINANT_HELPER_H diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index a3215b70f..4c98312e6 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -17,10 +17,8 @@ #include "QMCWaveFunctions/DiracDeterminant.h" -//#include "Numerics/DeterminantOperators.h" #include "Numerics/OhmmsBlas.h" -//#include "Numerics/MatrixOperators.h" -//#include "simd/simd.hpp" +#include "QMCWaveFunctions/DeterminantHelper.h" namespace qmcplusplus { @@ -30,7 +28,7 @@ namespace qmcplusplus */ template DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int delay) - : ndelay(1), invRow_id(-1), + : invRow_id(-1), Phi(spos), FirstIndex(first), LastIndex(first + spos->size()), diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h index 60ee5ce5c..0e7105698 100644 --- a/src/QMCWaveFunctions/DiracMatrix.h +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_DIRAC_MATRIX_H #include "Numerics/Blasf.h" +#include "Numerics/OhmmsBlas.h" #include "Numerics/OhmmsPETE/OhmmsMatrix.h" #include "Numerics/BlasThreadingEnv.h" #include "Utilities/scalar_traits.h" diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 7cac0a9fb..5a12c7a92 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -12,11 +12,15 @@ #ifndef QMCPLUSPLUS_SINGLEPARTICLEORBITALSET_H #define QMCPLUSPLUS_SINGLEPARTICLEORBITALSET_H -#include #include +#include "Utilities/Configuration.h" +#include namespace qmcplusplus { + +class ParticleSet; + /** base class for Single-particle orbital sets * * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains @@ -31,6 +35,12 @@ class SPOSet : public QMCTraits std::string className; public: + + using ValueVector_t = Vector; + using GradVector_t = Vector; + using ValueMatrix_t = Matrix; + using GradMatrix_t = Matrix; + /// return the size of the orbital set inline int size() const { return OrbitalSetSize; } @@ -43,6 +53,51 @@ class SPOSet : public QMCTraits virtual void evaluate_vgl(const PosType& p) = 0; virtual void evaluate_vgh(const PosType& p) = 0; + /** evaluate the values of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + */ + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi) + { + //FIXME + } + + /** evaluate the values, gradients and laplacians of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + */ + virtual void evaluate(const ParticleSet& P, + int iat, + ValueVector_t& psi, + GradVector_t& dpsi, + ValueVector_t& d2psi) + { + //FIXME + } + + /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param d2logdet laplacians + * + */ + virtual void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix_t& logdet, + GradMatrix_t& dlogdet, + ValueMatrix_t& d2logdet) + { + //FIXME + } + /// operates on multiple walkers virtual void multi_evaluate_v(const std::vector& spo_list, const std::vector& pos_list) diff --git a/src/Utilities/SIMD/algorithm.hpp b/src/Utilities/SIMD/algorithm.hpp index 2750acdb8..40e2f2702 100644 --- a/src/Utilities/SIMD/algorithm.hpp +++ b/src/Utilities/SIMD/algorithm.hpp @@ -47,15 +47,46 @@ inline void transpose(const T* restrict A, size_t m, size_t lda, TO* restrict B, B[i * ldb + j] = A[j * lda + i]; } -///inner product -template -inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res) +/** dot product + * @param a starting address of an array of type T + * @param b starting address of an array of type T + * @param n size + * @param res initial value, default=0.0 + * @return \f$ res = \sum_i a[i] b[i]\f$ + * + * same as inner_product(a,a+n,b,0.0) + * The arguments of this inline function are different from BLAS::dot + * This is more efficient than BLAS::dot due to the function overhead, + * if a compiler knows how to inline. + */ +template +inline T dot(const T* restrict a, const T* restrict b, int n, T res = T()) { - for (int i = 0; i < n; ++i) + for (int i = 0; i < n; i++) + res += a[i] * b[i]; + return res; +} + +template +inline TinyVector dot(const T* a, const TinyVector* b, int n) +{ + TinyVector res; + for (int i = 0; i < n; i++) res += a[i] * b[i]; return res; } +/** copy function using memcpy + * @param target starting address of the target + * @param source starting address of the source + * @param n size of the data to copy + */ +template +inline void copy(T* restrict target, const T* restrict source, size_t n) +{ + memcpy(target, source, sizeof(T) * n); +} + } // namespace simd } // namespace qmcplusplus #endif From a551369f36cd860c3b5deff749e6a6a33bcba0ac Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 3 Jun 2019 23:36:17 -0500 Subject: [PATCH 03/29] Add unit tests --- src/CMakeLists.txt | 2 + src/Drivers/CMakeLists.txt | 4 +- src/QMCWaveFunctions/CMakeLists.txt | 185 +------ src/QMCWaveFunctions/DelayedUpdate.h | 1 - src/QMCWaveFunctions/DiracMatrix.h | 1 + src/QMCWaveFunctions/SPOSet.h | 2 +- src/QMCWaveFunctions/tests/CMakeLists.txt | 21 + src/QMCWaveFunctions/tests/test_dirac_det.cpp | 517 ++++++++++++++++++ .../tests/test_dirac_matrix.cpp | 165 ++++++ src/Utilities/Configuration.h | 9 + 10 files changed, 722 insertions(+), 185 deletions(-) create mode 100644 src/QMCWaveFunctions/tests/CMakeLists.txt create mode 100644 src/QMCWaveFunctions/tests/test_dirac_det.cpp create mode 100644 src/QMCWaveFunctions/tests/test_dirac_matrix.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ecd5f6e58..69a55700d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -178,4 +178,6 @@ ENDIF() SUBDIRS(Particle/tests) + SUBDIRS(QMCWaveFunctions) + SUBDIRS(Drivers) diff --git a/src/Drivers/CMakeLists.txt b/src/Drivers/CMakeLists.txt index d68def337..988011725 100644 --- a/src/Drivers/CMakeLists.txt +++ b/src/Drivers/CMakeLists.txt @@ -8,13 +8,11 @@ FOREACH(p ${ESTEST}) TARGET_LINK_LIBRARIES(${p} qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) ENDFOREACH(p ${ESTEST}) -ADD_LIBRARY(miniwfs ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp ../QMCWaveFunctions/DiracDeterminant.cpp) - SET(DRIVERS miniqmc miniqmc_sync_move) FOREACH(p ${DRIVERS}) ADD_EXECUTABLE( ${p} ${p}.cpp) - TARGET_LINK_LIBRARIES(${p} miniwfs qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) + TARGET_LINK_LIBRARIES(${p} qmcwfs qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) ENDFOREACH(p ${ESTEST}) endif() diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 0b74ce801..fd93a57e2 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -2,188 +2,13 @@ #// This file is distributed under the University of Illinois/NCSA Open Source License. #// See LICENSE file in top directory for details. #// -#// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +#// Copyright (c) 2019 QMCPACK developers. #// -#// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University -#// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -#// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -#// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -#// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -#// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -#// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -#// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -#// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory #// -#// File created by: Bryan Clark, bclark@Princeton.edu, Princeton University +#// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory #////////////////////////////////////////////////////////////////////////////////////// - - -SET(WFBASE_SRCS - WaveFunctionComponent.cpp - DiffOrbitalBase.cpp - OrbitalBuilderBase.cpp - BasisSetBuilder.cpp - ProductOrbital.cpp - OrbitalConstraintsBase.cpp - SPOInfo.cpp - SPOSetInfo.cpp - SPOSetInputInfo.cpp - SPOSetBase.cpp - FermionBase.cpp - OptimizableSPOSet.cpp - AFMSPOSet.cpp - CompositeSPOSet.cpp - HarmonicOscillator/SHOSet.cpp - HarmonicOscillator/SHOSetBuilder.cpp - ) +ADD_LIBRARY(qmcwfs ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp ../QMCWaveFunctions/DiracDeterminant.cpp) -######################## -# build jastrows -######################## -#common jastrows -SET(JASTROW_SRCS - Jastrow/LRTwoBodyJastrow.cpp - Jastrow/PadeJastrowBuilder.cpp - Jastrow/JastrowBuilder.cpp - Jastrow/BsplineJastrowBuilder.cpp - Jastrow/kSpaceJastrow.cpp - Jastrow/kSpaceJastrowBuilder.cpp - Jastrow/RPAJastrow.cpp - Jastrow/singleRPAJastrowBuilder.cpp - Jastrow/JAABuilder.cpp - Jastrow/JABBuilder.cpp - IonOrbital.cpp - IonOrbitalBuilder.cpp - OptimizableSPOBuilder.cpp - AFMSPOBuilder.cpp - Fermion/SPOSetProxy.cpp - Fermion/SPOSetProxyForMSD.cpp - ) - -IF(QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - ElectronGas/ElectronGasComplexOrbitalBuilder.cpp - ) -ELSE(QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - ElectronGas/ElectronGasOrbitalBuilder.cpp - ) - -ENDIF(QMC_COMPLEX) - - -# wavefunctions only availbale to 3-dim problems -IF(OHMMS_DIM MATCHES 3) - - SET(JASTROW_SRCS ${JASTROW_SRCS} - Jastrow/eeI_JastrowBuilder.cpp - Jastrow/ThreeBodyGeminal.cpp - Jastrow/ThreeBodyBlockSparse.cpp - Jastrow/JastrowBasisBuilder.cpp - Jastrow/CBSOBuilder.cpp - ) - - - SET(FERMION_SRCS ${FERMION_SRCS} - MolecularOrbitals/STOBuilder.cpp - MolecularOrbitals/GTOBuilder.cpp - MolecularOrbitals/NGOBuilder.cpp - MolecularOrbitals/BsplineAOBuilder.cpp - ) - - - IF(HAVE_EINSPLINE) - SET(FERMION_SRCS ${FERMION_SRCS} - EinsplineSetBuilderCommon.cpp - EinsplineSetBuilderOld.cpp - MuffinTin.cpp - AtomicOrbital.cpp - EinsplineSetBuilderReadBands_ESHDF.cpp - EinsplineSetBuilderESHDF.fft.cpp - EinsplineSetBuilder_createSPOs.cpp - BsplineFactory/createComplexDouble.cpp - BsplineFactory/createComplexSingle.cpp - BandInfo.cpp - BsplineReaderBase.cpp - ) - - IF(NOT QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - BsplineFactory/createRealSingle.cpp - BsplineFactory/createRealDouble.cpp - ) - ENDIF() - - ENDIF(HAVE_EINSPLINE) - - # IF(QMC_BUILD_LEVEL GREATER 1) - # - SET(FERMION_SRCS ${FERMION_SRCS} - # Bspline3DSetBase.cpp - # Bspline3DSet.cpp - # Bspline3DSetTrunc.cpp - # TricubicBsplineSetBuilder.cpp - # TricubicBsplineSetBuilder.1.cpp - # TricubicBsplineSetBuilder.2.cpp - PlaneWave/PWBasis.cpp - PlaneWave/PWParameterSet.cpp - PlaneWave/PWOrbitalBuilder.cpp - ) - IF(QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - PlaneWave/PWOrbitalSet.cpp - ) - ELSE() - SET(FERMION_SRCS ${FERMION_SRCS} - PlaneWave/PWRealOrbitalSet.cpp - ) - ENDIF(QMC_COMPLEX) - # ENDIF(QMC_BUILD_LEVEL GREATER 1) - - #only experimental version - IF(QMC_BUILD_LEVEL GREATER 2) - IF(NOT QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - AGPDeterminant.cpp AGPDeterminantBuilder.cpp - ) - ENDIF(NOT QMC_COMPLEX) - ENDIF(QMC_BUILD_LEVEL GREATER 2) - -ENDIF(OHMMS_DIM MATCHES 3) - -SET(FERMION_SRCS ${FERMION_SRCS} - Fermion/DiracDeterminantBase.cpp - Fermion/DiracDeterminantSoA.cpp - Fermion/DiracDeterminantOpt.cpp - Fermion/DiracDeterminantAFM.cpp - Fermion/SlaterDet.cpp - Fermion/SlaterDetBuilder.cpp - Fermion/MultiSlaterDeterminant.cpp - Fermion/MultiSlaterDeterminantFast.cpp - Fermion/MultiDiracDeterminantBase.cpp - Fermion/BackflowBuilder.cpp - Fermion/DiracDeterminantWithBackflow.cpp - Fermion/SlaterDetWithBackflow.cpp - Fermion/MultiSlaterDeterminantWithBackflow.cpp - BasisSetFactory.cpp - TrialWaveFunction.cpp - WaveFunctionFactory.cpp - ) - -IF(NOT QMC_COMPLEX) - SET(FERMION_SRCS ${FERMION_SRCS} - Fermion/RNDiracDeterminantBase.cpp - Fermion/RNDiracDeterminantBaseAlternate.cpp - ) -ENDIF(NOT QMC_COMPLEX) - -#################################### -# create libqmcwfs -#################################### -ADD_LIBRARY(qmcwfs ${WFBASE_SRCS} ${JASTROW_SRCS} ${FERMION_SRCS}) -#IF(QMC_BUILD_STATIC) -# ADD_LIBRARY(qmcwfs STATIC ${WFBASE_SRCS} ${JASTROW_SRCS} ${FERMION_SRCS}) -#ELSE(QMC_BUILD_STATIC) -# ADD_LIBRARY(qmcwfs SHARED ${WFBASE_SRCS} ${JASTROW_SRCS} ${FERMION_SRCS}) -#ENDIF(QMC_BUILD_STATIC) +SUBDIRS(tests) diff --git a/src/QMCWaveFunctions/DelayedUpdate.h b/src/QMCWaveFunctions/DelayedUpdate.h index 04abe2517..dc2e6ab0b 100644 --- a/src/QMCWaveFunctions/DelayedUpdate.h +++ b/src/QMCWaveFunctions/DelayedUpdate.h @@ -16,7 +16,6 @@ #include #include #include "Numerics/OhmmsBlas.h" -#include "QMCWaveFunctions/DeterminantHelper.h" #include "QMCWaveFunctions/DiracMatrix.h" #include "Numerics/BlasThreadingEnv.h" diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h index 0e7105698..83d7ed3e1 100644 --- a/src/QMCWaveFunctions/DiracMatrix.h +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -18,6 +18,7 @@ #include "Numerics/BlasThreadingEnv.h" #include "Utilities/scalar_traits.h" #include "Utilities/SIMD/algorithm.hpp" +#include "QMCWaveFunctions/DeterminantHelper.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 5a12c7a92..11f1cacc6 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -28,7 +28,7 @@ class ParticleSet; */ class SPOSet : public QMCTraits { -private: +protected: /// number of SPOs int OrbitalSetSize; /// name of the basis set diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt new file mode 100644 index 000000000..3655820c8 --- /dev/null +++ b/src/QMCWaveFunctions/tests/CMakeLists.txt @@ -0,0 +1,21 @@ +#////////////////////////////////////////////////////////////////////////////////////// +#// This file is distributed under the University of Illinois/NCSA Open Source License. +#// See LICENSE file in top directory for details. +#// +#// Copyright (c) 2019 QMCPACK developers. +#// +#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +#// +#// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +#////////////////////////////////////////////////////////////////////////////////////// + +SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${QMCPACK_UNIT_TEST_DIR}) + +SET(SRC_DIR wavefunction) +SET(UTEST_EXE test_${SRC_DIR}) +SET(UTEST_NAME unit_test_${SRC_DIR}) + +ADD_EXECUTABLE(${UTEST_EXE} ../../Utilities/catch-main.cpp test_dirac_det.cpp test_dirac_matrix.cpp) +TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcwfs qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) + +ADD_UNIT_TEST(${UTEST_NAME} "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}") diff --git a/src/QMCWaveFunctions/tests/test_dirac_det.cpp b/src/QMCWaveFunctions/tests/test_dirac_det.cpp new file mode 100644 index 000000000..3bca2ec99 --- /dev/null +++ b/src/QMCWaveFunctions/tests/test_dirac_det.cpp @@ -0,0 +1,517 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2017 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "catch.hpp" + +#include +#include +#include "QMCWaveFunctions/DiracDeterminant.h" + +using std::string; + +namespace qmcplusplus +{ + +typedef QMCTraits::RealType RealType; +typedef QMCTraits::ValueType ValueType; +#ifdef ENABLE_CUDA +typedef DiracDeterminant> DetType; +#else +typedef DiracDeterminant<> DetType; +#endif + +template +void check_matrix(Matrix& a, Matrix& b) +{ + REQUIRE(a.size() == b.size()); + for (int i = 0; i < a.rows(); i++) + { + for (int j = 0; j < a.cols(); j++) + { + REQUIRE(a(i, j) == ValueApprox(b(i, j))); + } + } +} + +class FakeSPO : public SPOSet +{ +public: + Matrix a; + Matrix a2; + Vector v; + Matrix v2; + + FakeSPO(); + virtual ~FakeSPO() {} + + virtual void setOrbitalSetSize(int norbs); + + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi); + + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi, GradVector_t& dpsi, ValueVector_t& d2psi); + + virtual void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix_t& logdet, + GradMatrix_t& dlogdet, + ValueMatrix_t& d2logdet); + + void evaluate_v(const PosType& p) {}; + void evaluate_vgl(const PosType& p) {}; + void evaluate_vgh(const PosType& p) {}; +}; + +FakeSPO::FakeSPO() +{ + className = "FakeSPO"; + a.resize(3, 3); + + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; + + v.resize(3); + v[0] = 1.9; + v[1] = 2.0; + v[2] = 3.1; + + + a2.resize(4, 4); + a2(0, 0) = 2.3; + a2(0, 1) = 4.5; + a2(0, 2) = 2.6; + a2(0, 3) = 1.2; + a2(1, 0) = 0.5; + a2(1, 1) = 8.5; + a2(1, 2) = 3.3; + a2(1, 3) = 0.3; + a2(2, 0) = 1.8; + a2(2, 1) = 4.4; + a2(2, 2) = 4.9; + a2(2, 3) = 2.8; + a2(3, 0) = 0.8; + a2(3, 1) = 4.1; + a2(3, 2) = 3.2; + a2(3, 3) = 1.1; + + v2.resize(3, 4); + + v2(0, 0) = 3.2; + v2(0, 1) = 0.5; + v2(0, 2) = 5.9; + v2(0, 3) = 3.7; + v2(1, 0) = 0.3; + v2(1, 1) = 1.4; + v2(1, 2) = 3.9; + v2(1, 3) = 8.2; + v2(2, 0) = 3.3; + v2(2, 1) = 5.4; + v2(2, 2) = 4.9; + v2(2, 3) = 2.2; +} + +void FakeSPO::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; } + +void FakeSPO::evaluate(const ParticleSet& P, int iat, ValueVector_t& psi) +{ + if (OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + { + psi[i] = a(iat, i); + } + } + else if (OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + { + psi[i] = a2(iat, i); + } + } +} + +void FakeSPO::evaluate(const ParticleSet& P, int iat, ValueVector_t& psi, GradVector_t& dpsi, ValueVector_t& d2psi) +{ + if (OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + { + psi[i] = v[i]; + } + } + else if (OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + { + psi[i] = v2(iat, i); + } + } +} + +void FakeSPO::evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix_t& logdet, + GradMatrix_t& dlogdet, + ValueMatrix_t& d2logdet) +{ + if (OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + logdet(j, i) = a(i, j); + } + } + } + else if (OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + logdet(j, i) = a2(i, j); + } + } + } +} + +TEST_CASE("DiracDeterminant_first", "[wavefunction][fermion]") +{ + FakeSPO* spo = new FakeSPO(); + const int norb = 3; + spo->setOrbitalSetSize(norb); + DetType ddb(spo, 0); + + // occurs in call to registerData + ddb.dpsiV.resize(norb); + ddb.d2psiV.resize(norb); + + + ParticleSet elec; + + elec.create(3); + ddb.recompute(elec); + + Matrix b; + b.resize(3, 3); + + b(0, 0) = 0.6159749342; + b(0, 1) = -0.2408954682; + b(0, 2) = -0.1646081192; + b(1, 0) = 0.07923894288; + b(1, 1) = 0.1496231042; + b(1, 2) = -0.1428117337; + b(2, 0) = -0.2974298429; + b(2, 1) = -0.04586322768; + b(2, 2) = 0.3927890292; + + check_matrix(ddb.psiM, b); + + + ParticleSet::GradType grad; + ValueType det_ratio = ddb.ratioGrad(elec, 0, grad); + ValueType det_ratio1 = 0.178276269185; + REQUIRE(det_ratio1 == ValueApprox(det_ratio)); + + ddb.acceptMove(elec, 0); + ddb.completeUpdates(); + + b(0, 0) = 3.455170657; + b(0, 1) = -1.35124809; + b(0, 2) = -0.9233316353; + b(1, 0) = 0.05476311768; + b(1, 1) = 0.1591951095; + b(1, 2) = -0.1362710138; + b(2, 0) = -2.235099338; + b(2, 1) = 0.7119205298; + b(2, 2) = 0.9105960265; + + check_matrix(ddb.psiM, b); +} + +//#define DUMP_INFO + +TEST_CASE("DiracDeterminant_second", "[wavefunction][fermion]") +{ + FakeSPO* spo = new FakeSPO(); + const int norb = 4; + spo->setOrbitalSetSize(norb); + DetType ddb(spo, 0); + + // occurs in call to registerData + ddb.dpsiV.resize(norb); + ddb.d2psiV.resize(norb); + + + ParticleSet elec; + + elec.create(4); + ddb.recompute(elec); + + Matrix orig_a; + orig_a.resize(4, 4); + orig_a = spo->a2; + + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < norb; j++) + { + orig_a(j, i) = spo->v2(i, j); + } + } + + //check_matrix(ddb.psiM, b); + DiracMatrix dm; + + Matrix a_update1, scratchT; + a_update1.resize(4, 4); + scratchT.resize(4, 4); + a_update1 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update1(j, 0) = spo->v2(0, j); + } + + Matrix a_update2; + a_update2.resize(4, 4); + a_update2 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update2(j, 0) = spo->v2(0, j); + a_update2(j, 1) = spo->v2(1, j); + } + + Matrix a_update3; + a_update3.resize(4, 4); + a_update3 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update3(j, 0) = spo->v2(0, j); + a_update3(j, 1) = spo->v2(1, j); + a_update3(j, 2) = spo->v2(2, j); + } + + ParticleSet::GradType grad; + ValueType det_ratio = ddb.ratioGrad(elec, 0, grad); + + simd::transpose(a_update1.data(), a_update1.rows(), a_update1.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + RealType det_update1, phase; + dm.invert_transpose(scratchT, a_update1, det_update1, phase); + ValueType log_ratio1 = det_update1 - ddb.LogValue; + ValueType det_ratio1 = std::exp(log_ratio1); +#ifdef DUMP_INFO + std::cout << "det 0 = " << std::exp(ddb.LogValue) << std::endl; + std::cout << "det 1 = " << std::exp(det_update1) << std::endl; + std::cout << "det ratio 1 = " << det_ratio1 << std::endl; +#endif + //double det_ratio1 = 0.178276269185; + + REQUIRE(det_ratio1 == ValueApprox(det_ratio)); + + ddb.acceptMove(elec, 0); + + + ValueType det_ratio2 = ddb.ratioGrad(elec, 1, grad); + RealType det_update2; + simd::transpose(a_update2.data(), a_update2.rows(), a_update2.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + dm.invert_transpose(scratchT, a_update2, det_update2, phase); + ValueType log_ratio2 = det_update2 - det_update1; + ValueType det_ratio2_val = std::exp(log_ratio2); +#ifdef DUMP_INFO + std::cout << "det 1 = " << std::exp(ddb.LogValue) << std::endl; + std::cout << "det 2 = " << std::exp(det_update2) << std::endl; + std::cout << "det ratio 2 = " << det_ratio2 << std::endl; +#endif + //double det_ratio2_val = 0.178276269185; + REQUIRE(std::abs(det_ratio2) == ValueApprox(det_ratio2_val)); + + + ddb.acceptMove(elec, 1); + + ValueType det_ratio3 = ddb.ratioGrad(elec, 2, grad); + RealType det_update3; + simd::transpose(a_update3.data(), a_update3.rows(), a_update3.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + dm.invert_transpose(scratchT, a_update3, det_update3, phase); + ValueType log_ratio3 = det_update3 - det_update2; + ValueType det_ratio3_val = std::exp(log_ratio3); +#ifdef DUMP_INFO + std::cout << "det 2 = " << std::exp(ddb.LogValue) << std::endl; + std::cout << "det 3 = " << std::exp(det_update3) << std::endl; + std::cout << "det ratio 3 = " << det_ratio3 << std::endl; +#endif + REQUIRE(det_ratio3 == ValueApprox(det_ratio3_val)); + //check_value(det_ratio3, det_ratio3_val); + + ddb.acceptMove(elec, 2); + ddb.completeUpdates(); + + simd::transpose(orig_a.data(), orig_a.rows(), orig_a.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + dm.invert_transpose(scratchT, orig_a, det_update3, phase); + +#ifdef DUMP_INFO + std::cout << "original " << std::endl; + std::cout << orig_a << std::endl; + std::cout << "block update " << std::endl; + std::cout << ddb.psiM << std::endl; +#endif + + check_matrix(orig_a, ddb.psiM); +} + +TEST_CASE("DiracDeterminant_delayed_update", "[wavefunction][fermion]") +{ + FakeSPO* spo = new FakeSPO(); + const int norb = 4; + spo->setOrbitalSetSize(norb); + // maximum delay 2 + DetType ddc(spo, 0, 2); + + // occurs in call to registerData + ddc.dpsiV.resize(norb); + ddc.d2psiV.resize(norb); + + + ParticleSet elec; + + elec.create(4); + ddc.recompute(elec); + + Matrix orig_a; + orig_a.resize(4, 4); + orig_a = spo->a2; + + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < norb; j++) + { + orig_a(j, i) = spo->v2(i, j); + } + } + + //check_matrix(ddc.psiM, b); + DiracMatrix dm; + + Matrix a_update1, scratchT; + scratchT.resize(4, 4); + a_update1.resize(4, 4); + a_update1 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update1(j, 0) = spo->v2(0, j); + } + + Matrix a_update2; + a_update2.resize(4, 4); + a_update2 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update2(j, 0) = spo->v2(0, j); + a_update2(j, 1) = spo->v2(1, j); + } + + Matrix a_update3; + a_update3.resize(4, 4); + a_update3 = spo->a2; + for (int j = 0; j < norb; j++) + { + a_update3(j, 0) = spo->v2(0, j); + a_update3(j, 1) = spo->v2(1, j); + a_update3(j, 2) = spo->v2(2, j); + } + + + ParticleSet::GradType grad; + ValueType det_ratio = ddc.ratioGrad(elec, 0, grad); + + simd::transpose(a_update1.data(), a_update1.rows(), a_update1.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + RealType det_update1, phase; + dm.invert_transpose(scratchT, a_update1, det_update1, phase); + ValueType log_ratio1 = det_update1 - ddc.LogValue; + ValueType det_ratio1 = std::exp(log_ratio1); +#ifdef DUMP_INFO + std::cout << "det 0 = " << std::exp(ddc.LogValue) << std::endl; + std::cout << "det 1 = " << std::exp(det_update1) << std::endl; + std::cout << "det ratio 1 = " << det_ratio1 << std::endl; +#endif + //double det_ratio1 = 0.178276269185; + + REQUIRE(det_ratio1 == ValueApprox(det_ratio)); + + // update of Ainv in ddc is delayed + ddc.acceptMove(elec, 0); + // force update Ainv in ddc using SM-1 code path + ddc.completeUpdates(); + + grad = ddc.evalGrad(elec, 1); + ValueType det_ratio2 = ddc.ratioGrad(elec, 1, grad); + simd::transpose(a_update2.data(), a_update2.rows(), a_update2.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + RealType det_update2; + dm.invert_transpose(scratchT, a_update2, det_update2, phase); + ValueType log_ratio2 = det_update2 - det_update1; + ValueType det_ratio2_val = std::exp(log_ratio2); +#ifdef DUMP_INFO + std::cout << "det 1 = " << std::exp(ddc.LogValue) << std::endl; + std::cout << "det 2 = " << std::exp(det_update2) << std::endl; + std::cout << "det ratio 2 = " << det_ratio2 << std::endl; +#endif + // check ratio computed directly and the one computed by ddc with no delay + //double det_ratio2_val = 0.178276269185; + REQUIRE(std::abs(det_ratio2) == ValueApprox(det_ratio2_val)); + + // update of Ainv in ddc is delayed + ddc.acceptMove(elec, 1); + + grad = ddc.evalGrad(elec, 2); + ValueType det_ratio3 = ddc.ratioGrad(elec, 2, grad); + simd::transpose(a_update3.data(), a_update3.rows(), a_update3.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + RealType det_update3; + dm.invert_transpose(scratchT, a_update3, det_update3, phase); + ValueType log_ratio3 = det_update3 - det_update2; + ValueType det_ratio3_val = std::exp(log_ratio3); +#ifdef DUMP_INFO + std::cout << "det 2 = " << std::exp(ddc.LogValue) << std::endl; + std::cout << "det 3 = " << std::exp(det_update3) << std::endl; + std::cout << "det ratio 3 = " << det_ratio3 << std::endl; +#endif + // check ratio computed directly and the one computed by ddc with 1 delay + REQUIRE(det_ratio3 == ValueApprox(det_ratio3_val)); + //check_value(det_ratio3, det_ratio3_val); + + // maximal delay reached and Ainv is updated fully + ddc.acceptMove(elec, 2); + ddc.completeUpdates(); + + // fresh invert orig_a + simd::transpose(orig_a.data(), orig_a.rows(), orig_a.cols(), scratchT.data(), scratchT.rows(), scratchT.cols()); + dm.invert_transpose(scratchT, orig_a, det_update3, phase); + +#ifdef DUMP_INFO + std::cout << "original " << std::endl; + std::cout << orig_a << std::endl; + std::cout << "delayed update " << std::endl; + std::cout << ddc.psiM << std::endl; +#endif + + // compare all the elements of psiM in ddc and orig_a + check_matrix(orig_a, ddc.psiM); +} + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_dirac_matrix.cpp b/src/QMCWaveFunctions/tests/test_dirac_matrix.cpp new file mode 100644 index 000000000..f4f7c784b --- /dev/null +++ b/src/QMCWaveFunctions/tests/test_dirac_matrix.cpp @@ -0,0 +1,165 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2017 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "catch.hpp" + +#include +#include + +#include "Numerics/OhmmsPETE/OhmmsMatrix.h" +#include "QMCWaveFunctions/WaveFunctionComponent.h" +#include "QMCWaveFunctions/DiracMatrix.h" +#include "QMCWaveFunctions/DelayedUpdate.h" + +using std::string; + +namespace qmcplusplus +{ + +typedef QMCTraits::RealType RealType; +typedef QMCTraits::ValueType ValueType; + +template +void check_matrix(Matrix& a, Matrix& b) +{ + REQUIRE(a.size() == b.size()); + for (int i = 0; i < a.rows(); i++) + { + for (int j = 0; j < a.cols(); j++) + { + REQUIRE(a(i, j) == ValueApprox(b(i, j))); + } + } +} + +TEST_CASE("DiracMatrix_identity", "[wavefunction][fermion]") +{ + DiracMatrix dm; + + Matrix m, m_invT; + RealType LogValue, PhaseValue; + m.resize(3, 3); + m_invT.resize(3, 3); + + m(0, 0) = 1.0; + m(1, 1) = 1.0; + m(2, 2) = 1.0; + + dm.invert_transpose(m, m_invT, LogValue, PhaseValue); + REQUIRE(LogValue == ValueApprox(0.0)); + + Matrix eye; + eye.resize(3, 3); + eye(0, 0) = 1.0; + eye(1, 1) = 1.0; + eye(2, 2) = 1.0; + + check_matrix(m_invT, eye); +} + +TEST_CASE("DiracMatrix_inverse", "[wavefunction][fermion]") +{ + DiracMatrix dm; + + Matrix a, a_T, a_inv; + RealType LogValue, PhaseValue; + a.resize(3, 3); + a_T.resize(3, 3); + a_inv.resize(3, 3); + + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; + + simd::transpose(a.data(), a.rows(), a.cols(), a_T.data(), a_T.rows(), a_T.cols()); + dm.invert_transpose(a_T, a_inv, LogValue, PhaseValue); + REQUIRE(LogValue == ValueApprox(3.78518913425)); + + Matrix b; + b.resize(3, 3); + + b(0, 0) = 0.6159749342; + b(0, 1) = -0.2408954682; + b(0, 2) = -0.1646081192; + b(1, 0) = 0.07923894288; + b(1, 1) = 0.1496231042; + b(1, 2) = -0.1428117337; + b(2, 0) = -0.2974298429; + b(2, 1) = -0.04586322768; + b(2, 2) = 0.3927890292; + + check_matrix(a_inv, b); +} + + +TEST_CASE("DiracMatrix_update_row", "[wavefunction][fermion]") +{ + DiracMatrix dm; + DelayedUpdate updateEng; + updateEng.resize(3, 1); + + Matrix a, a_T, a_inv; + RealType LogValue, PhaseValue; + a.resize(3, 3); + a_T.resize(3, 3); + a_inv.resize(3, 3); + + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; + + simd::transpose(a.data(), a.rows(), a.cols(), a_T.data(), a_T.rows(), a_T.cols()); + dm.invert_transpose(a_T, a_inv, LogValue, PhaseValue); + + // new row + Vector v(3), invRow(3); + v[0] = 1.9; + v[1] = 2.0; + v[2] = 3.1; + + updateEng.getInvRow(a_inv, 0, invRow); + ValueType det_ratio1 = simd::dot(v.data(), invRow.data(), invRow.size()); + + ValueType det_ratio = 0.178276269185; + REQUIRE(det_ratio1 == ValueApprox(det_ratio)); + updateEng.acceptRow(a_inv, 0, v); + + Matrix b; + b.resize(3, 3); + + b(0, 0) = 3.455170657; + b(0, 1) = -1.35124809; + b(0, 2) = -0.9233316353; + b(1, 0) = 0.05476311768; + b(1, 1) = 0.1591951095; + b(1, 2) = -0.1362710138; + b(2, 0) = -2.235099338; + b(2, 1) = 0.7119205298; + b(2, 2) = 0.9105960265; + + check_matrix(a_inv, b); +} + + +} // namespace qmcplusplus diff --git a/src/Utilities/Configuration.h b/src/Utilities/Configuration.h index fd2ebec09..b849c0eb9 100644 --- a/src/Utilities/Configuration.h +++ b/src/Utilities/Configuration.h @@ -147,6 +147,15 @@ struct PtclOnLatticeTraits // clang-format on }; +// For unit tests +// Check if we are compiling with Catch defined. Could use other symbols if needed. +#ifdef TEST_CASE +#ifdef QMC_COMPLEX +typedef ComplexApprox ValueApprox; +#else +typedef Approx ValueApprox; +#endif +#endif } // namespace qmcplusplus From 8078aba72ac5f0656865a1e83cb5e6b1d20c9153 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 3 Jun 2019 23:42:12 -0500 Subject: [PATCH 04/29] Format codes. --- src/QMCWaveFunctions/DelayedUpdate.h | 1 - src/QMCWaveFunctions/DeterminantHelper.h | 1 - src/QMCWaveFunctions/DiracDeterminant.cpp | 32 +++++++++++------------ src/QMCWaveFunctions/DiracDeterminant.h | 16 +++++++----- src/QMCWaveFunctions/DiracMatrix.h | 14 ++++------ src/QMCWaveFunctions/SPOSet.h | 27 +++++++------------ 6 files changed, 39 insertions(+), 52 deletions(-) diff --git a/src/QMCWaveFunctions/DelayedUpdate.h b/src/QMCWaveFunctions/DelayedUpdate.h index dc2e6ab0b..736afffe1 100644 --- a/src/QMCWaveFunctions/DelayedUpdate.h +++ b/src/QMCWaveFunctions/DelayedUpdate.h @@ -21,7 +21,6 @@ namespace qmcplusplus { - /** implements delayed update on CPU using BLAS * @tparam T base precision for most computation * @tparam T_FP high precision for matrix inversion, T_FP >= T diff --git a/src/QMCWaveFunctions/DeterminantHelper.h b/src/QMCWaveFunctions/DeterminantHelper.h index 47189ae95..99247dd79 100644 --- a/src/QMCWaveFunctions/DeterminantHelper.h +++ b/src/QMCWaveFunctions/DeterminantHelper.h @@ -17,7 +17,6 @@ namespace qmcplusplus { - template inline T evaluatePhase(T sign_v) { diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 4c98312e6..4ba080fd3 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -29,22 +29,22 @@ namespace qmcplusplus template DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int delay) : invRow_id(-1), - Phi(spos), - FirstIndex(first), - LastIndex(first + spos->size()), - ndelay(delay), - NumPtcls(spos->size()), - NumOrbitals(spos->size()), - UpdateTimer("DiracDeterminantBase::update", timer_level_fine), - RatioTimer("DiracDeterminantBase::ratio", timer_level_fine), - InverseTimer("DiracDeterminantBase::inverse", timer_level_fine), - BufferTimer("DiracDeterminantBase::buffer", timer_level_fine), - SPOVTimer("DiracDeterminantBase::spoval", timer_level_fine), - SPOVGLTimer("DiracDeterminantBase::spovgl", timer_level_fine) - { - registerTimers(); - resize(spos->size(), spos->size()); - } + Phi(spos), + FirstIndex(first), + LastIndex(first + spos->size()), + ndelay(delay), + NumPtcls(spos->size()), + NumOrbitals(spos->size()), + UpdateTimer("DiracDeterminantBase::update", timer_level_fine), + RatioTimer("DiracDeterminantBase::ratio", timer_level_fine), + InverseTimer("DiracDeterminantBase::inverse", timer_level_fine), + BufferTimer("DiracDeterminantBase::buffer", timer_level_fine), + SPOVTimer("DiracDeterminantBase::spoval", timer_level_fine), + SPOVGLTimer("DiracDeterminantBase::spovgl", timer_level_fine) +{ + registerTimers(); + resize(spos->size(), spos->size()); +} template void DiracDeterminant::invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat) diff --git a/src/QMCWaveFunctions/DiracDeterminant.h b/src/QMCWaveFunctions/DiracDeterminant.h index 04141d50b..2e6c72d73 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.h +++ b/src/QMCWaveFunctions/DiracDeterminant.h @@ -38,12 +38,12 @@ class DiracDeterminant : public WaveFunctionComponent public: using ValueVector_t = Vector; using ValueMatrix_t = Matrix; - using GradVector_t = Vector; - using GradMatrix_t = Matrix; + using GradVector_t = Vector; + using GradMatrix_t = Matrix; - using mValueType = QMCTraits::QTFull::ValueType; + using mValueType = QMCTraits::QTFull::ValueType; using ValueMatrix_hp_t = Matrix; - using mGradType = TinyVector; + using mGradType = TinyVector; /** constructor *@param spos the single-particle orbital set @@ -58,7 +58,10 @@ class DiracDeterminant : public WaveFunctionComponent ///invert psiM or its copies void invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat); - void evaluateGL(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L, bool fromscratch = false); + void evaluateGL(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L, + bool fromscratch = false); /** return the ratio only for the iat-th partcle move * @param P current configuration @@ -114,7 +117,7 @@ class DiracDeterminant : public WaveFunctionComponent int invRow_id; ValueType curRatio; - + private: /// Timers NewTimer UpdateTimer, RatioTimer, InverseTimer, BufferTimer, SPOVTimer, SPOVGLTimer; @@ -147,7 +150,6 @@ class DiracDeterminant : public WaveFunctionComponent TimerManager.addTimer(&SPOVTimer); TimerManager.addTimer(&SPOVGLTimer); } - }; diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h index 83d7ed3e1..04f16c21e 100644 --- a/src/QMCWaveFunctions/DiracMatrix.h +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -155,16 +155,12 @@ class DiracMatrix } public: - DiracMatrix() : Lwork(0) {} /** compute the inverse of the transpose of matrix A * assume precision T_FP >= T, do the inversion always with T_FP */ - inline void invert_transpose(const Matrix& amat, - Matrix& invMat, - real_type& LogDet, - real_type& Phase) + inline void invert_transpose(const Matrix& amat, Matrix& invMat, real_type& LogDet, real_type& Phase) { BlasThreadingEnv knob(getNumThreadsNested()); const int n = invMat.rows(); @@ -174,18 +170,18 @@ class DiracMatrix simd::transpose(amat.data(), n, amat.cols(), invMat.data(), n, invMat.cols()); invMat_ptr = invMat.data(); #else - psiM_fp.resize(n,lda); + psiM_fp.resize(n, lda); simd::transpose(amat.data(), n, amat.cols(), psiM_fp.data(), n, psiM_fp.cols()); invMat_ptr = psiM_fp.data(); #endif if (Lwork < lda) reset(invMat_ptr, lda); Xgetrf(n, n, invMat_ptr, lda, m_pivot.data()); - for(int i=0; i; - using GradVector_t = Vector; + using GradVector_t = Vector; using ValueMatrix_t = Matrix; - using GradMatrix_t = Matrix; + using GradMatrix_t = Matrix; /// return the size of the orbital set inline int size() const { return OrbitalSetSize; } @@ -70,11 +68,7 @@ class SPOSet : public QMCTraits * @param dpsi gradients of the SPO * @param d2psi laplacians of the SPO */ - virtual void evaluate(const ParticleSet& P, - int iat, - ValueVector_t& psi, - GradVector_t& dpsi, - ValueVector_t& d2psi) + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi, GradVector_t& dpsi, ValueVector_t& d2psi) { //FIXME } @@ -99,26 +93,23 @@ class SPOSet : public QMCTraits } /// operates on multiple walkers - virtual void - multi_evaluate_v(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_v(const std::vector& spo_list, const std::vector& pos_list) { - #pragma omp parallel for +#pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) spo_list[iw]->evaluate_v(pos_list[iw]); } - virtual void - multi_evaluate_vgl(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_vgl(const std::vector& spo_list, const std::vector& pos_list) { - #pragma omp parallel for +#pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) spo_list[iw]->evaluate_vgl(pos_list[iw]); } - virtual void - multi_evaluate_vgh(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_vgh(const std::vector& spo_list, const std::vector& pos_list) { - #pragma omp parallel for +#pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) spo_list[iw]->evaluate_vgh(pos_list[iw]); } From b18e1803b49f5af08c63408565eb9d852f9d42ff Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 00:10:50 -0500 Subject: [PATCH 05/29] Fix SPO size --- src/QMCWaveFunctions/einspline_spo.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index 70257614f..96f30a783 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -85,6 +85,7 @@ struct einspline_spo : public SPOSet */ einspline_spo(const einspline_spo& in, int team_size, int member_id) : Owner(false), Lattice(in.Lattice) { + OrbitalSetSize = in.OrbitalSetSize; nSplines = in.nSplines; nSplinesPerBlock = in.nSplinesPerBlock; nBlocks = (in.nBlocks + team_size - 1) / team_size; @@ -123,6 +124,9 @@ struct einspline_spo : public SPOSet // fix for general num_splines void set(int nx, int ny, int nz, int num_splines, int nblocks, bool init_random = true) { + // setting OrbitalSetSize to num_splines made artificial only in miniQMC + OrbitalSetSize = num_splines; + nSplines = num_splines; nBlocks = nblocks; nSplinesPerBlock = num_splines / nblocks; From f4fa4c3be6ba316d92e859a310f08a6fd9dc2015 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 00:38:46 -0500 Subject: [PATCH 06/29] Minor fix. still buggy. --- src/QMCWaveFunctions/DiracDeterminant.cpp | 12 ++++++------ src/QMCWaveFunctions/WaveFunction.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 4ba080fd3..6e24b7c4d 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -35,12 +35,12 @@ DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int d ndelay(delay), NumPtcls(spos->size()), NumOrbitals(spos->size()), - UpdateTimer("DiracDeterminantBase::update", timer_level_fine), - RatioTimer("DiracDeterminantBase::ratio", timer_level_fine), - InverseTimer("DiracDeterminantBase::inverse", timer_level_fine), - BufferTimer("DiracDeterminantBase::buffer", timer_level_fine), - SPOVTimer("DiracDeterminantBase::spoval", timer_level_fine), - SPOVGLTimer("DiracDeterminantBase::spovgl", timer_level_fine) + UpdateTimer("DiracDeterminant::update", timer_level_fine), + RatioTimer("DiracDeterminant::ratio", timer_level_fine), + InverseTimer("DiracDeterminant::inverse", timer_level_fine), + BufferTimer("DiracDeterminant::buffer", timer_level_fine), + SPOVTimer("DiracDeterminant::spoval", timer_level_fine), + SPOVGLTimer("DiracDeterminant::spovgl", timer_level_fine) { registerTimers(); resize(spos->size(), spos->size()); diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index f70b1c0bc..0b418bfdd 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -116,7 +116,7 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; WF.Det_up = new DetType(WF.spo, 0, 1); - WF.Det_dn = new DetType(WF.spo, 0, 1); + WF.Det_dn = new DetType(WF.spo, nelup, 1); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); From 907401f6d02a1f1cfe20f80eac1e31d78c94a47c Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 13:20:03 -0500 Subject: [PATCH 07/29] Make all timers as pointers. --- src/QMCWaveFunctions/DiracDeterminant.cpp | 59 +++++++++++------------ src/QMCWaveFunctions/DiracDeterminant.h | 21 +++----- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 6e24b7c4d..84424f7b9 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -34,24 +34,23 @@ DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int d LastIndex(first + spos->size()), ndelay(delay), NumPtcls(spos->size()), - NumOrbitals(spos->size()), - UpdateTimer("DiracDeterminant::update", timer_level_fine), - RatioTimer("DiracDeterminant::ratio", timer_level_fine), - InverseTimer("DiracDeterminant::inverse", timer_level_fine), - BufferTimer("DiracDeterminant::buffer", timer_level_fine), - SPOVTimer("DiracDeterminant::spoval", timer_level_fine), - SPOVGLTimer("DiracDeterminant::spovgl", timer_level_fine) + NumOrbitals(spos->size()) { - registerTimers(); + UpdateTimer = TimerManager.createTimer("DiracDeterminant::update", timer_level_fine); + RatioTimer = TimerManager.createTimer("DiracDeterminant::ratio", timer_level_fine); + InverseTimer = TimerManager.createTimer("DiracDeterminant::inverse", timer_level_fine); + BufferTimer = TimerManager.createTimer("DiracDeterminant::buffer", timer_level_fine); + SPOVTimer = TimerManager.createTimer("DiracDeterminant::spoval", timer_level_fine); + SPOVGLTimer = TimerManager.createTimer("DiracDeterminant::spovgl", timer_level_fine); resize(spos->size(), spos->size()); } template void DiracDeterminant::invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat) { - InverseTimer.start(); + InverseTimer->start(); updateEng.invert_transpose(logdetT, invMat, LogValue, PhaseValue); - InverseTimer.stop(); + InverseTimer->stop(); } @@ -81,11 +80,11 @@ template typename DiracDeterminant::GradType DiracDeterminant::evalGrad(ParticleSet& P, int iat) { const int WorkingIndex = iat - FirstIndex; - RatioTimer.start(); + RatioTimer->start(); invRow_id = WorkingIndex; updateEng.getInvRow(psiM, WorkingIndex, invRow); GradType g = simd::dot(invRow.data(), dpsiM[WorkingIndex], invRow.size()); - RatioTimer.stop(); + RatioTimer->stop(); return g; } @@ -94,10 +93,10 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratioGr int iat, GradType& grad_iat) { - SPOVGLTimer.start(); + SPOVGLTimer->start(); Phi->evaluate(P, iat, psiV, dpsiV, d2psiV); - SPOVGLTimer.stop(); - RatioTimer.start(); + SPOVGLTimer->stop(); + RatioTimer->start(); const int WorkingIndex = iat - FirstIndex; UpdateMode = ORB_PBYP_PARTIAL; GradType rv; @@ -112,7 +111,7 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratioGr } curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); grad_iat += ((RealType)1.0 / curRatio) * simd::dot(invRow.data(), dpsiV.data(), invRow.size()); - RatioTimer.stop(); + RatioTimer->stop(); return curRatio; } @@ -124,7 +123,7 @@ void DiracDeterminant::acceptMove(ParticleSet& P, int iat) const int WorkingIndex = iat - FirstIndex; PhaseValue += evaluatePhase(curRatio); LogValue += std::log(std::abs(curRatio)); - UpdateTimer.start(); + UpdateTimer->start(); updateEng.acceptRow(psiM, WorkingIndex, psiV); // invRow becomes invalid after accepting a move invRow_id = -1; @@ -133,18 +132,18 @@ void DiracDeterminant::acceptMove(ParticleSet& P, int iat) simd::copy(dpsiM[WorkingIndex], dpsiV.data(), NumOrbitals); simd::copy(d2psiM[WorkingIndex], d2psiV.data(), NumOrbitals); } - UpdateTimer.stop(); + UpdateTimer->stop(); curRatio = 1.0; } template void DiracDeterminant::completeUpdates() { - UpdateTimer.start(); + UpdateTimer->start(); // invRow becomes invalid after updating the inverse matrix invRow_id = -1; updateEng.updateInvMat(psiM); - UpdateTimer.stop(); + UpdateTimer->stop(); } template @@ -155,9 +154,9 @@ void DiracDeterminant::evaluateGL(ParticleSet& P, { if (UpdateMode == ORB_PBYP_RATIO) { //need to compute dpsiM and d2psiM. Do not touch psiM! - SPOVGLTimer.start(); + SPOVGLTimer->start(); Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); - SPOVGLTimer.stop(); + SPOVGLTimer->stop(); } if (NumPtcls == 1) @@ -188,10 +187,10 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratio(P { UpdateMode = ORB_PBYP_RATIO; const int WorkingIndex = iat - FirstIndex; - SPOVTimer.start(); + SPOVTimer->start(); Phi->evaluate(P, iat, psiV); - SPOVTimer.stop(); - RatioTimer.start(); + SPOVTimer->stop(); + RatioTimer->start(); // This is an optimization. // check invRow_id against WorkingIndex to see if getInvRow() has been called // This is intended to save redundant compuation in TM1 and TM3 @@ -201,7 +200,7 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratio(P updateEng.getInvRow(psiM, WorkingIndex, invRow); } curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); - RatioTimer.stop(); + RatioTimer->stop(); return curRatio; } @@ -209,12 +208,12 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratio(P template void DiracDeterminant::evaluateRatios(VirtualParticleSet& VP, std::vector& ratios) { - SPOVTimer.start(); + SPOVTimer->start(); const int WorkingIndex = VP.refPtcl - FirstIndex; invRow_id = WorkingIndex; updateEng.getInvRow(psiM, WorkingIndex, invRow); Phi->evaluateDetRatios(VP, psiV, invRow, ratios); - SPOVTimer.stop(); + SPOVTimer->stop(); } */ @@ -258,9 +257,9 @@ typename DiracDeterminant::RealType DiracDeterminant::evaluate template void DiracDeterminant::recompute(ParticleSet& P) { - SPOVGLTimer.start(); + SPOVGLTimer->start(); Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); - SPOVGLTimer.stop(); + SPOVGLTimer->stop(); if (NumPtcls == 1) { //CurrentDet=psiM(0,0); diff --git a/src/QMCWaveFunctions/DiracDeterminant.h b/src/QMCWaveFunctions/DiracDeterminant.h index 2e6c72d73..70333f9cc 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.h +++ b/src/QMCWaveFunctions/DiracDeterminant.h @@ -120,7 +120,12 @@ class DiracDeterminant : public WaveFunctionComponent private: /// Timers - NewTimer UpdateTimer, RatioTimer, InverseTimer, BufferTimer, SPOVTimer, SPOVGLTimer; + NewTimer* UpdateTimer; + NewTimer* RatioTimer; + NewTimer* InverseTimer; + NewTimer* BufferTimer; + NewTimer* SPOVTimer; + NewTimer* SPOVGLTimer; /// a set of single-particle orbitals used to fill in the values of the matrix SPOSet* const Phi; ///index of the first particle with respect to the particle set @@ -134,22 +139,8 @@ class DiracDeterminant : public WaveFunctionComponent /// delayed update rank int ndelay; - ///reset the size: with the number of particles and number of orbtials void resize(int nel, int morb); - - /// register all the timers - void registerTimers() - { - UpdateTimer.reset(); - RatioTimer.reset(); - TimerManager.addTimer(&UpdateTimer); - TimerManager.addTimer(&RatioTimer); - TimerManager.addTimer(&InverseTimer); - TimerManager.addTimer(&BufferTimer); - TimerManager.addTimer(&SPOVTimer); - TimerManager.addTimer(&SPOVGLTimer); - } }; From aa0841dc00ed290d2aedeb61eefa06fbe5580bdf Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 14:38:38 -0500 Subject: [PATCH 08/29] Connect SPO to Det --- src/QMCWaveFunctions/SPOSet.h | 20 +++++++++++++------- src/QMCWaveFunctions/einspline_spo.hpp | 26 ++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index c58a70f73..90be5b64d 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -14,11 +14,11 @@ #include #include "Utilities/Configuration.h" -#include +#include "Numerics/OhmmsPETE/OhmmsMatrix.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { -class ParticleSet; /** base class for Single-particle orbital sets * @@ -56,9 +56,9 @@ class SPOSet : public QMCTraits * @param iat active particle * @param psi values of the SPO */ - virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi) + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) { - //FIXME + evaluate_v(P.activeR(iat)); } /** evaluate the values, gradients and laplacians of this single-particle orbital set @@ -68,9 +68,9 @@ class SPOSet : public QMCTraits * @param dpsi gradients of the SPO * @param d2psi laplacians of the SPO */ - virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi, GradVector_t& dpsi, ValueVector_t& d2psi) + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) { - //FIXME + evaluate_vgh(P.activeR(iat)); } /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles @@ -89,7 +89,13 @@ class SPOSet : public QMCTraits GradMatrix_t& dlogdet, ValueMatrix_t& d2logdet) { - //FIXME + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector_t v(logdet[i], OrbitalSetSize); + GradVector_t g(dlogdet[i], OrbitalSetSize); + ValueVector_t l(d2logdet[i], OrbitalSetSize); + evaluate(P, iat, v, g, l); + } } /// operates on multiple walkers diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index 96f30a783..aa77626c8 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -159,13 +159,19 @@ struct einspline_spo : public SPOSet } /** evaluate psi */ - inline void evaluate_v(const PosType& p) + inline void evaluate_v(const PosType& p) {} + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) { ScopedTimer local_timer(timer); - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) + { MultiBsplineEval::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); + // in real simulation, phase needs to be applied. Here just fake computation + const int first = i*nBlocks; + std::copy_n(psi[i].data(), std::min((i+1)*nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data()+first); + } } /** evaluate psi, grad and lap */ @@ -178,14 +184,26 @@ struct einspline_spo : public SPOSet } /** evaluate psi, grad and hess */ - inline void evaluate_vgh(const PosType& p) + inline void evaluate_vgh(const PosType& p) {} + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) { ScopedTimer local_timer(timer); - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) + { MultiBsplineEval::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), nSplinesPerBlock); + // in real simulation, phase needs to be applied. Here just fake computation + const int first = i*nBlocks; + for (int j = first; j < std::min((i+1)*nSplinesPerBlock, OrbitalSetSize); j++) + { + psi_v[j] = psi[i][j-first]; + dpsi_v[j] = grad[i][j-first]; + d2psi_v[j] = hess[i].data(0)[j-first] + hess[i].data(1)[j-first] + hess[i].data(2)[j-first] + + hess[i].data(3)[j-first] + hess[i].data(4)[j-first] + hess[i].data(5)[j-first]; + } + } } void print(std::ostream& os) From 5a4b0e3b54a566f6554b0e820c6f399ab5cf83b8 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 14:59:28 -0500 Subject: [PATCH 09/29] Add delay rank arguments. --- src/Drivers/miniqmc.cpp | 15 +++++++++++---- src/Drivers/miniqmc_sync_move.cpp | 13 ++++++++++--- src/QMCWaveFunctions/WaveFunction.cpp | 5 +++-- src/QMCWaveFunctions/WaveFunction.h | 2 ++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 26043b4da..2371dbe70 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -153,7 +153,7 @@ void print_help() app_summary() << " miniqmc [-bhjvV] [-g \"n0 n1 n2\"] [-m meshfactor]" << '\n'; app_summary() << " [-n steps] [-N substeps] [-x rmax]" << '\n'; app_summary() << " [-r AcceptanceRatio] [-s seed] [-w walkers]" << '\n'; - app_summary() << " [-a tile_size] [-t timer_level]" << '\n'; + app_summary() << " [-a tile_size] [-t timer_level] [-u delay_rank]" << '\n'; app_summary() << "options:" << '\n'; app_summary() << " -a size of each spline tile default: num of orbs" << '\n'; app_summary() << " -b use reference implementations default: off" << '\n'; @@ -166,9 +166,10 @@ void print_help() app_summary() << " -r set the acceptance ratio. default: 0.5" << '\n'; app_summary() << " -s set the random seed. default: 11" << '\n'; app_summary() << " -t timer level: coarse or fine default: fine" << '\n'; - app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; + app_summary() << " -u matrix delayed update rank default: 32" << '\n'; app_summary() << " -v verbose output" << '\n'; app_summary() << " -V print version information and exit" << '\n'; + app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; app_summary() << " -x set the Rmax. default: 1.7" << '\n'; // clang-format on } @@ -199,6 +200,7 @@ int main(int argc, char** argv) // Set cutoff for NLPP use. RealType Rmax(1.7); RealType accept = 0.5; + int delay_rank = 32; bool useRef = false; bool enableJ3 = false; @@ -215,7 +217,7 @@ int main(int argc, char** argv) int opt; while (optind < argc) { - if ((opt = getopt(argc, argv, "bhjvVa:c:g:m:n:N:r:s:t:w:x:")) != -1) + if ((opt = getopt(argc, argv, "bhjvVa:c:g:m:n:N:r:s:t:u:w:x:")) != -1) { switch (opt) { @@ -261,6 +263,9 @@ int main(int argc, char** argv) case 't': timer_level_name = std::string(optarg); break; + case 'u': + delay_rank = atoi(optarg); + break; case 'v': verbose = true; break; @@ -351,6 +356,8 @@ int main(int argc, char** argv) app_summary() << "\nSPO coefficients size = " << SPO_coeff_size << " bytes (" << SPO_coeff_size_MB << " MB)" << endl; + app_summary() << "delayed update rank = " << delay_rank << endl; + spo_main = build_SPOSet(useRef, nx, ny, nz, norb, nTiles, lattice_b); Timers[Timer_Setup]->stop(); @@ -380,7 +387,7 @@ int main(int argc, char** argv) mover_list[iw] = thiswalker; // create wavefunction per mover - build_WaveFunction(useRef, spo_main, thiswalker->wavefunction, ions, thiswalker->els, thiswalker->rng, enableJ3); + build_WaveFunction(useRef, spo_main, thiswalker->wavefunction, ions, thiswalker->els, thiswalker->rng, delay_rank, enableJ3); // initial computing thiswalker->els.update(); diff --git a/src/Drivers/miniqmc_sync_move.cpp b/src/Drivers/miniqmc_sync_move.cpp index c1941cf70..db027a73d 100644 --- a/src/Drivers/miniqmc_sync_move.cpp +++ b/src/Drivers/miniqmc_sync_move.cpp @@ -154,6 +154,7 @@ void print_help() app_summary() << " [-n steps] [-N substeps] [-x rmax]" << '\n'; app_summary() << " [-r AcceptanceRatio] [-s seed] [-w walkers]" << '\n'; app_summary() << " [-a tile_size] [-t timer_level] [-B nw_b]" << '\n'; + app_summary() << " [-u delay_rank]" << '\n'; app_summary() << "options:" << '\n'; app_summary() << " -a size of each spline tile default: num of orbs" << '\n'; app_summary() << " -b use reference implementations default: off" << '\n'; @@ -168,9 +169,10 @@ void print_help() app_summary() << " -r set the acceptance ratio. default: 0.5" << '\n'; app_summary() << " -s set the random seed. default: 11" << '\n'; app_summary() << " -t timer level: coarse or fine default: fine" << '\n'; - app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; + app_summary() << " -u matrix delayed update rank default: 32" << '\n'; app_summary() << " -v verbose output" << '\n'; app_summary() << " -V print version information and exit" << '\n'; + app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; app_summary() << " -x set the Rmax. default: 1.7" << '\n'; // clang-format on } @@ -205,6 +207,7 @@ int main(int argc, char** argv) // Set cutoff for NLPP use. RealType Rmax(1.7); RealType accept = 0.5; + int delay_rank = 32; bool useRef = false; bool enableJ3 = false; bool run_pseudo = true; @@ -222,7 +225,7 @@ int main(int argc, char** argv) int opt; while (optind < argc) { - if ((opt = getopt(argc, argv, "bhjPvVa:B:c:g:m:n:N:r:s:t:w:x:")) != -1) + if ((opt = getopt(argc, argv, "bhjPvVa:B:c:g:m:n:N:r:s:t:u:w:x:")) != -1) { switch (opt) { @@ -274,6 +277,9 @@ int main(int argc, char** argv) case 't': timer_level_name = std::string(optarg); break; + case 'u': + delay_rank = atoi(optarg); + break; case 'v': verbose = true; break; @@ -372,6 +378,7 @@ int main(int argc, char** argv) app_summary() << "\nSPO coefficients size = " << SPO_coeff_size << " bytes (" << SPO_coeff_size_MB << " MB)" << endl; + app_summary() << "delayed update rank = " << delay_rank << endl; spo_main = build_SPOSet(useRef, nx, ny, nz, norb, nTiles, lattice_b); Timers[Timer_Setup]->stop(); @@ -399,7 +406,7 @@ int main(int argc, char** argv) mover_list[iw] = thiswalker; // create wavefunction per mover - build_WaveFunction(useRef, spo_main, thiswalker->wavefunction, ions, thiswalker->els, thiswalker->rng, enableJ3); + build_WaveFunction(useRef, spo_main, thiswalker->wavefunction, ions, thiswalker->els, thiswalker->rng, delay_rank, enableJ3); // initial computing thiswalker->els.update(); diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 0b418bfdd..9bcbe58ea 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -46,6 +46,7 @@ void build_WaveFunction(bool useRef, ParticleSet& ions, ParticleSet& els, const RandomGenerator& RNG, + int delay_rank, bool enableJ3) { using valT = WaveFunction::valT; @@ -115,8 +116,8 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; - WF.Det_up = new DetType(WF.spo, 0, 1); - WF.Det_dn = new DetType(WF.spo, nelup, 1); + WF.Det_up = new DetType(WF.spo, 0, delay_rank); + WF.Det_dn = new DetType(WF.spo, nelup, delay_rank); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); diff --git a/src/QMCWaveFunctions/WaveFunction.h b/src/QMCWaveFunctions/WaveFunction.h index b69448957..1f820058c 100644 --- a/src/QMCWaveFunctions/WaveFunction.h +++ b/src/QMCWaveFunctions/WaveFunction.h @@ -100,6 +100,7 @@ class WaveFunction ParticleSet& ions, ParticleSet& els, const RandomGenerator& RNG, + int delay_rank, bool enableJ3); const std::vector extract_spo_list(const std::vector& WF_list) const; const std::vector @@ -116,6 +117,7 @@ void build_WaveFunction(bool useRef, ParticleSet& ions, ParticleSet& els, const RandomGenerator& RNG, + int delay_rank, bool enableJ3); } // namespace qmcplusplus From eaca306dfabc21d37392dc718913a74340311b74 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 19:48:11 -0500 Subject: [PATCH 10/29] Fix check_spo --- src/Drivers/check_spo.cpp | 16 ++++++----- src/QMCWaveFunctions/SPOSet.h | 22 +++++++-------- src/QMCWaveFunctions/WaveFunction.cpp | 9 ++---- src/QMCWaveFunctions/einspline_spo.hpp | 28 +++++++++++++------ src/QMCWaveFunctions/einspline_spo_ref.hpp | 12 ++++---- src/QMCWaveFunctions/tests/test_dirac_det.cpp | 6 ++-- 6 files changed, 52 insertions(+), 41 deletions(-) diff --git a/src/Drivers/check_spo.cpp b/src/Drivers/check_spo.cpp index e76a42aac..8060a8c9e 100644 --- a/src/Drivers/check_spo.cpp +++ b/src/Drivers/check_spo.cpp @@ -249,9 +249,9 @@ int main(int argc, char** argv) // VMC for (int iel = 0; iel < nels; ++iel) { - PosType pos = els.R[iel] + delta[iel]; - spo.evaluate_vgh(pos); - spo_ref.evaluate_vgh(pos); + els.makeMove(iel, delta[iel]); + spo.evaluate_vgh(els, iel); + spo_ref.evaluate_vgh(els, iel); // accumulate error for (int ib = 0; ib < spo.nBlocks; ib++) for (int n = 0; n < spo.nSplinesPerBlock; n++) @@ -272,7 +272,7 @@ int main(int argc, char** argv) } if (ur[iel] < accept) { - els.R[iel] = pos; + els.acceptMove(iel); my_accepted++; } } @@ -288,11 +288,13 @@ int main(int argc, char** argv) for (int nn = 0; nn < nnF; ++nn) { + int iel = nn + iat * zval; for (int k = 0; k < nknots; k++) { - PosType pos = centerP + r * rOnSphere[k]; - spo.evaluate_v(pos); - spo_ref.evaluate_v(pos); + PosType delta_qp = centerP + r * rOnSphere[k] - els.R[iel]; + els.makeMove(iel, delta_qp); + spo.evaluate_v(els, iel); + spo_ref.evaluate_v(els, iel); // accumulate error for (int ib = 0; ib < spo.nBlocks; ib++) for (int n = 0; n < spo.nSplinesPerBlock; n++) diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 90be5b64d..e6e1b2dc8 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -47,9 +47,9 @@ class SPOSet : public QMCTraits /// operates on a single walker /// evaluating SPOs - virtual void evaluate_v(const PosType& p) = 0; - virtual void evaluate_vgl(const PosType& p) = 0; - virtual void evaluate_vgh(const PosType& p) = 0; + virtual void evaluate_v(const ParticleSet& P, int iat) = 0; + virtual void evaluate_vgl(const ParticleSet& P, int iat) = 0; + virtual void evaluate_vgh(const ParticleSet& P, int iat) = 0; /** evaluate the values of this single-particle orbital set * @param P current ParticleSet @@ -58,7 +58,7 @@ class SPOSet : public QMCTraits */ virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) { - evaluate_v(P.activeR(iat)); + evaluate_v(P, iat); } /** evaluate the values, gradients and laplacians of this single-particle orbital set @@ -70,7 +70,7 @@ class SPOSet : public QMCTraits */ virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) { - evaluate_vgh(P.activeR(iat)); + evaluate_vgh(P, iat); } /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles @@ -99,25 +99,25 @@ class SPOSet : public QMCTraits } /// operates on multiple walkers - virtual void multi_evaluate_v(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_v(const std::vector& spo_list, const std::vector& P_list, int iat) { #pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_v(pos_list[iw]); + spo_list[iw]->evaluate_v(*P_list[iw], iat); } - virtual void multi_evaluate_vgl(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_vgl(const std::vector& spo_list, const std::vector& P_list, int iat) { #pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_vgl(pos_list[iw]); + spo_list[iw]->evaluate_vgl(*P_list[iw], iat); } - virtual void multi_evaluate_vgh(const std::vector& spo_list, const std::vector& pos_list) + virtual void multi_evaluate_vgh(const std::vector& spo_list, const std::vector& P_list, int iat) { #pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_vgh(pos_list[iw]); + spo_list[iw]->evaluate_vgh(*P_list[iw], iat); } }; diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 9bcbe58ea..026c9b3cc 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -208,7 +208,7 @@ WaveFunction::posT WaveFunction::evalGrad(ParticleSet& P, int iat) WaveFunction::valT WaveFunction::ratioGrad(ParticleSet& P, int iat, posT& grad) { - spo->evaluate_vgh(P.activePos); + spo->evaluate_vgh(P, iat); timers[Timer_Det]->start(); grad = valT(0); @@ -226,7 +226,7 @@ WaveFunction::valT WaveFunction::ratioGrad(ParticleSet& P, int iat, posT& grad) WaveFunction::valT WaveFunction::ratio(ParticleSet& P, int iat) { - spo->evaluate_v(P.activePos); + spo->evaluate_v(P, iat); timers[Timer_Det]->start(); valT ratio = (iat < nelup ? Det_up->ratio(P, iat) : Det_dn->ratio(P, iat)); @@ -373,11 +373,8 @@ void WaveFunction::flex_ratioGrad(const std::vector& WF_list, { if (P_list.size() > 1) { - std::vector pos_list(P_list.size()); - for (int iw = 0; iw < P_list.size(); iw++) - pos_list[iw] = P_list[iw]->activePos; auto spo_list(extract_spo_list(WF_list)); - spo->multi_evaluate_vgh(spo_list, pos_list); + spo->multi_evaluate_vgh(spo_list, P_list, iat); timers[Timer_Det]->start(); std::vector ratios_det(P_list.size()); diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index aa77626c8..553e9b20f 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -159,15 +159,21 @@ struct einspline_spo : public SPOSet } /** evaluate psi */ - inline void evaluate_v(const PosType& p) {} - inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) + inline void evaluate_v(const ParticleSet& P, int iat) { ScopedTimer local_timer(timer); auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) - { MultiBsplineEval::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); + } + + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) + { + evaluate_v(P, iat); + + for (int i = 0; i < nBlocks; ++i) + { // in real simulation, phase needs to be applied. Here just fake computation const int first = i*nBlocks; std::copy_n(psi[i].data(), std::min((i+1)*nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data()+first); @@ -175,25 +181,31 @@ struct einspline_spo : public SPOSet } /** evaluate psi, grad and lap */ - inline void evaluate_vgl(const PosType& p) + inline void evaluate_vgl(const ParticleSet& P, int iat) { - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) MultiBsplineEval::evaluate_vgl(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), nSplinesPerBlock); } /** evaluate psi, grad and hess */ - inline void evaluate_vgh(const PosType& p) {} - inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) + inline void evaluate_vgh(const ParticleSet& P, int iat) { ScopedTimer local_timer(timer); auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) - { MultiBsplineEval::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), nSplinesPerBlock); + } + + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) + { + evaluate_vgh(P, iat); + + for (int i = 0; i < nBlocks; ++i) + { // in real simulation, phase needs to be applied. Here just fake computation const int first = i*nBlocks; for (int j = first; j < std::min((i+1)*nSplinesPerBlock, OrbitalSetSize); j++) diff --git a/src/QMCWaveFunctions/einspline_spo_ref.hpp b/src/QMCWaveFunctions/einspline_spo_ref.hpp index bd696dca4..dab85838a 100644 --- a/src/QMCWaveFunctions/einspline_spo_ref.hpp +++ b/src/QMCWaveFunctions/einspline_spo_ref.hpp @@ -156,30 +156,30 @@ struct einspline_spo_ref : public SPOSet } /** evaluate psi */ - inline void evaluate_v(const PosType& p) + inline void evaluate_v(const ParticleSet& P, int iat) { ScopedTimer local_timer(timer); - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } /** evaluate psi, grad and lap */ - inline void evaluate_vgl(const PosType& p) + inline void evaluate_vgl(const ParticleSet& P, int iat) { - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) MultiBsplineEvalRef::evaluate_vgl(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), nSplinesPerBlock); } /** evaluate psi, grad and hess */ - inline void evaluate_vgh(const PosType& p) + inline void evaluate_vgh(const ParticleSet& P, int iat) { ScopedTimer local_timer(timer); - auto u = Lattice.toUnit_floor(p); + auto u = Lattice.toUnit_floor(P.activeR(iat)); for (int i = 0; i < nBlocks; ++i) MultiBsplineEvalRef::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), nSplinesPerBlock); diff --git a/src/QMCWaveFunctions/tests/test_dirac_det.cpp b/src/QMCWaveFunctions/tests/test_dirac_det.cpp index 3bca2ec99..714f6aefe 100644 --- a/src/QMCWaveFunctions/tests/test_dirac_det.cpp +++ b/src/QMCWaveFunctions/tests/test_dirac_det.cpp @@ -66,9 +66,9 @@ class FakeSPO : public SPOSet GradMatrix_t& dlogdet, ValueMatrix_t& d2logdet); - void evaluate_v(const PosType& p) {}; - void evaluate_vgl(const PosType& p) {}; - void evaluate_vgh(const PosType& p) {}; + void evaluate_v(const ParticleSet& P, int iat) {}; + void evaluate_vgl(const ParticleSet& P, int iat) {}; + void evaluate_vgh(const ParticleSet& P, int iat) {}; }; FakeSPO::FakeSPO() From f0ef52b17bfb47c01af60401e19ba5f5f0067248 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 23:08:55 -0500 Subject: [PATCH 11/29] Add DiracDeterminantRef and add check_wfc --- src/Drivers/CMakeLists.txt | 4 +- src/Drivers/check_wfc.cpp | 36 ++- src/QMCWaveFunctions/CMakeLists.txt | 4 +- src/QMCWaveFunctions/DiracDeterminantRef.cpp | 286 +++++++++++++++++++ src/QMCWaveFunctions/DiracDeterminantRef.h | 150 ++++++++++ src/QMCWaveFunctions/WaveFunctionComponent.h | 4 + src/QMCWaveFunctions/einspline_spo_ref.hpp | 34 +++ 7 files changed, 511 insertions(+), 7 deletions(-) create mode 100644 src/QMCWaveFunctions/DiracDeterminantRef.cpp create mode 100644 src/QMCWaveFunctions/DiracDeterminantRef.h diff --git a/src/Drivers/CMakeLists.txt b/src/Drivers/CMakeLists.txt index 988011725..c6ad9ec63 100644 --- a/src/Drivers/CMakeLists.txt +++ b/src/Drivers/CMakeLists.txt @@ -1,14 +1,14 @@ if(QMC_BUILD_LEVEL GREATER 4) # add apps XYZ.cpp, e.g., qmc_particles.cpp #SET(ESTEST einspline_smp einspline_spo qmc_particles moveonsphere twobody ptclset) -SET(ESTEST check_wfc check_spo check_determinant) +SET(ESTEST check_spo check_determinant) FOREACH(p ${ESTEST}) ADD_EXECUTABLE( ${p} ${p}.cpp) TARGET_LINK_LIBRARIES(${p} qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) ENDFOREACH(p ${ESTEST}) -SET(DRIVERS miniqmc miniqmc_sync_move) +SET(DRIVERS check_wfc miniqmc miniqmc_sync_move) FOREACH(p ${DRIVERS}) ADD_EXECUTABLE( ${p} ${p}.cpp) diff --git a/src/Drivers/check_wfc.cpp b/src/Drivers/check_wfc.cpp index d59014059..da2637d6e 100644 --- a/src/Drivers/check_wfc.cpp +++ b/src/Drivers/check_wfc.cpp @@ -36,6 +36,9 @@ #include #include #include +#include +#include +#include #include #include @@ -50,7 +53,7 @@ void print_help() cout << " [-r rmax] [-s seed]" << '\n'; cout << "options:" << '\n'; cout << " -f specify wavefunction component to check" << '\n'; - cout << " one of: J1, J2, J3. default: J2" << '\n'; + cout << " one of: J1, J2, J3, Det. default: J2" << '\n'; cout << " -g set the 3D tiling. default: 1 1 1" << '\n'; cout << " -h print help and exit" << '\n'; cout << " -r set the Rmax. default: 1.7" << '\n'; @@ -128,7 +131,7 @@ int main(int argc, char** argv) else outputManager.setVerbosity(Verbosity::LOW); - if (wfc_name != "J1" && wfc_name != "J2" && wfc_name != "J3" && wfc_name != "JeeI") + if (wfc_name != "J1" && wfc_name != "J2" && wfc_name != "J3" && wfc_name != "JeeI" && wfc_name != "Det") { cerr << "Uknown wave funciton component: " << wfc_name << endl << endl; print_help(); @@ -154,6 +157,14 @@ int main(int argc, char** argv) PrimeNumberSet myPrimes; + SPOSet* spo_main = nullptr; + { + ParticleSet els; + RandomGenerator random_th(myPrimes[0]); + build_els(els, ions, random_th); + if (wfc_name == "Det") spo_main = build_SPOSet(false, 40, 40, 40, els.getTotalNum(), 1, lattice_b); + } + // clang-format off #pragma omp parallel reduction(+:evaluateLog_v_err,evaluateLog_g_err,evaluateLog_l_err,evalGrad_g_err) \ reduction(+:ratioGrad_r_err,ratioGrad_g_err,evaluateGL_g_err,evaluateGL_l_err,ratio_err) @@ -187,6 +198,7 @@ int main(int argc, char** argv) WaveFunctionComponentPtr wfc = nullptr; WaveFunctionComponentPtr wfc_ref = nullptr; + if (wfc_name == "J2") { TwoBodyJastrow>* J = new TwoBodyJastrow>(els); @@ -224,6 +236,17 @@ int main(int argc, char** argv) wfc_ref = dynamic_cast(J_ref); cout << "Built JeeI_ref" << endl; } + else if (wfc_name == "Det") + { + SPOSet* spo = build_SPOSet_view(false, spo_main, 1, 0); + auto* Det = new DiracDeterminant<>(spo, 0, 31); + wfc = dynamic_cast(Det); + cout << "Built Det" << endl; + SPOSet* spo_ref = build_SPOSet_view(false, spo_main, 1, 0); + auto* Det_ref = new miniqmcreference::DiracDeterminantRef<>(spo_ref, 0, 31); + wfc_ref = dynamic_cast(Det_ref); + cout << "Built Det_ref" << endl; + } constexpr RealType czero(0); @@ -310,6 +333,10 @@ int main(int argc, char** argv) els_ref.rejectMove(iel); } } + + wfc->completeUpdates(); + wfc_ref->completeUpdates(); + cout << "Accepted " << naccepted << "/" << nels << endl; cout << "evalGrad::G Error = " << g_eval / nels << endl; cout << "ratioGrad::G Error = " << g_ratio / nels << endl; @@ -382,8 +409,9 @@ int main(int argc, char** argv) } } // end of omp parallel - int np = omp_get_max_threads(); - constexpr RealType small = std::numeric_limits::epsilon() * 1e4; + int np = omp_get_max_threads(); + const RealType small = std::numeric_limits::epsilon() * ( wfc_name == "Det" ? 1e8 : 1e4 ); + std::cout << "Tolerance " << small << std::endl; bool fail = false; cout << std::endl; if (evaluateLog_v_err / np > small) diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index fd93a57e2..b58242a41 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -9,6 +9,8 @@ #// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory #////////////////////////////////////////////////////////////////////////////////////// -ADD_LIBRARY(qmcwfs ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp ../QMCWaveFunctions/DiracDeterminant.cpp) +ADD_LIBRARY(qmcwfs + ../QMCWaveFunctions/WaveFunction.cpp ../QMCWaveFunctions/SPOSet_builder.cpp + ../QMCWaveFunctions/DiracDeterminant.cpp ../QMCWaveFunctions/DiracDeterminantRef.cpp) SUBDIRS(tests) diff --git a/src/QMCWaveFunctions/DiracDeterminantRef.cpp b/src/QMCWaveFunctions/DiracDeterminantRef.cpp new file mode 100644 index 000000000..dd4678f9d --- /dev/null +++ b/src/QMCWaveFunctions/DiracDeterminantRef.cpp @@ -0,0 +1,286 @@ +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#include "QMCWaveFunctions/DiracDeterminantRef.h" +#include "Numerics/OhmmsBlas.h" +#include "QMCWaveFunctions/DeterminantHelper.h" + +namespace miniqmcreference +{ +using namespace qmcplusplus; + +/** constructor + *@param spos the single-particle orbital set + *@param first index of the first particle + */ +template +DiracDeterminantRef::DiracDeterminantRef(SPOSet* const spos, int first, int delay) + : invRow_id(-1), + Phi(spos), + FirstIndex(first), + LastIndex(first + spos->size()), + ndelay(delay), + NumPtcls(spos->size()), + NumOrbitals(spos->size()) +{ + UpdateTimer = TimerManager.createTimer("DiracDeterminantRef::update", timer_level_fine); + RatioTimer = TimerManager.createTimer("DiracDeterminantRef::ratio", timer_level_fine); + InverseTimer = TimerManager.createTimer("DiracDeterminantRef::inverse", timer_level_fine); + BufferTimer = TimerManager.createTimer("DiracDeterminantRef::buffer", timer_level_fine); + SPOVTimer = TimerManager.createTimer("DiracDeterminantRef::spoval", timer_level_fine); + SPOVGLTimer = TimerManager.createTimer("DiracDeterminantRef::spovgl", timer_level_fine); + resize(spos->size(), spos->size()); +} + +template +void DiracDeterminantRef::invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat) +{ + InverseTimer->start(); + updateEng.invert_transpose(logdetT, invMat, LogValue, PhaseValue); + InverseTimer->stop(); +} + + +///reset the size: with the number of particles and number of orbtials +template +void DiracDeterminantRef::resize(int nel, int morb) +{ + int norb = morb; + if (norb <= 0) + norb = nel; // for morb == -1 (default) + updateEng.resize(norb, ndelay); + psiM.resize(nel, norb); + dpsiM.resize(nel, norb); + d2psiM.resize(nel, norb); + psiV.resize(norb); + invRow.resize(norb); + psiM_temp.resize(nel, norb); + LastIndex = FirstIndex + nel; + NumPtcls = nel; + NumOrbitals = norb; + + dpsiV.resize(NumOrbitals); + d2psiV.resize(NumOrbitals); +} + +template +typename DiracDeterminantRef::GradType DiracDeterminantRef::evalGrad(ParticleSet& P, int iat) +{ + const int WorkingIndex = iat - FirstIndex; + RatioTimer->start(); + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + GradType g = simd::dot(invRow.data(), dpsiM[WorkingIndex], invRow.size()); + RatioTimer->stop(); + return g; +} + +template +typename DiracDeterminantRef::ValueType DiracDeterminantRef::ratioGrad(ParticleSet& P, + int iat, + GradType& grad_iat) +{ + SPOVGLTimer->start(); + Phi->evaluate(P, iat, psiV, dpsiV, d2psiV); + SPOVGLTimer->stop(); + RatioTimer->start(); + const int WorkingIndex = iat - FirstIndex; + UpdateMode = ORB_PBYP_PARTIAL; + GradType rv; + + // This is an optimization. + // check invRow_id against WorkingIndex to see if getInvRow() has been called already + // Some code paths call evalGrad before calling ratioGrad. + if (invRow_id != WorkingIndex) + { + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + } + curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); + grad_iat += ((RealType)1.0 / curRatio) * simd::dot(invRow.data(), dpsiV.data(), invRow.size()); + RatioTimer->stop(); + return curRatio; +} + +/** move was accepted, update the real container +*/ +template +void DiracDeterminantRef::acceptMove(ParticleSet& P, int iat) +{ + const int WorkingIndex = iat - FirstIndex; + PhaseValue += evaluatePhase(curRatio); + LogValue += std::log(std::abs(curRatio)); + UpdateTimer->start(); + updateEng.acceptRow(psiM, WorkingIndex, psiV); + // invRow becomes invalid after accepting a move + invRow_id = -1; + if (UpdateMode == ORB_PBYP_PARTIAL) + { + simd::copy(dpsiM[WorkingIndex], dpsiV.data(), NumOrbitals); + simd::copy(d2psiM[WorkingIndex], d2psiV.data(), NumOrbitals); + } + UpdateTimer->stop(); + curRatio = 1.0; +} + +template +void DiracDeterminantRef::completeUpdates() +{ + UpdateTimer->start(); + // invRow becomes invalid after updating the inverse matrix + invRow_id = -1; + updateEng.updateInvMat(psiM); + UpdateTimer->stop(); +} + +template +void DiracDeterminantRef::evaluateGL(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L, + bool fromscratch) +{ + if (UpdateMode == ORB_PBYP_RATIO) + { //need to compute dpsiM and d2psiM. Do not touch psiM! + SPOVGLTimer->start(); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); + SPOVGLTimer->stop(); + } + + if (NumPtcls == 1) + { + ValueType y = psiM(0, 0); + GradType rv = y * dpsiM(0, 0); + G[FirstIndex] += rv; + L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); + } + else + { + for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) + { + mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); + mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); + G[iat] += rv; + L[iat] += dot_temp - dot(rv, rv); + } + } +} + +/** return the ratio only for the iat-th partcle move + * @param P current configuration + * @param iat the particle thas is being moved + */ +template +typename DiracDeterminantRef::ValueType DiracDeterminantRef::ratio(ParticleSet& P, int iat) +{ + UpdateMode = ORB_PBYP_RATIO; + const int WorkingIndex = iat - FirstIndex; + SPOVTimer->start(); + Phi->evaluate(P, iat, psiV); + SPOVTimer->stop(); + RatioTimer->start(); + // This is an optimization. + // check invRow_id against WorkingIndex to see if getInvRow() has been called + // This is intended to save redundant compuation in TM1 and TM3 + if (invRow_id != WorkingIndex) + { + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + } + curRatio = simd::dot(invRow.data(), psiV.data(), invRow.size()); + RatioTimer->stop(); + return curRatio; +} + +/* Ye: to be recovered +template +void DiracDeterminantRef::evaluateRatios(VirtualParticleSet& VP, std::vector& ratios) +{ + SPOVTimer->start(); + const int WorkingIndex = VP.refPtcl - FirstIndex; + invRow_id = WorkingIndex; + updateEng.getInvRow(psiM, WorkingIndex, invRow); + Phi->evaluateDetRatios(VP, psiV, invRow, ratios); + SPOVTimer->stop(); +} +*/ + +/** Calculate the log value of the Dirac determinant for particles + *@param P input configuration containing N particles + *@param G a vector containing N gradients + *@param L a vector containing N laplacians + *@return the value of the determinant + * + *\f$ (first,first+nel). \f$ Add the gradient and laplacian + *contribution of the determinant to G(radient) and L(aplacian) + *for local energy calculations. + */ +template +typename DiracDeterminantRef::RealType DiracDeterminantRef::evaluateLog(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L) +{ + recompute(P); + + if (NumPtcls == 1) + { + ValueType y = psiM(0, 0); + GradType rv = y * dpsiM(0, 0); + G[FirstIndex] += rv; + L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); + } + else + { + for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) + { + mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); + mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); + G[iat] += rv; + L[iat] += lap - dot(rv, rv); + } + } + return LogValue; +} + +template +void DiracDeterminantRef::recompute(ParticleSet& P) +{ + SPOVGLTimer->start(); + Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); + SPOVGLTimer->stop(); + if (NumPtcls == 1) + { + //CurrentDet=psiM(0,0); + ValueType det = psiM_temp(0, 0); + psiM(0, 0) = RealType(1) / det; + LogValue = evaluateLogAndPhase(det, PhaseValue); + } + else + { + invertPsiM(psiM_temp, psiM); + } +} + +typedef QMCTraits::ValueType ValueType; +typedef QMCTraits::QTFull::ValueType mValueType; + +template class DiracDeterminantRef<>; +#if defined(ENABLE_CUDA) +template class DiracDeterminantRef>; +#endif + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/DiracDeterminantRef.h b/src/QMCWaveFunctions/DiracDeterminantRef.h new file mode 100644 index 000000000..d699e1ae9 --- /dev/null +++ b/src/QMCWaveFunctions/DiracDeterminantRef.h @@ -0,0 +1,150 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/**@file DiracDeterminantRef.h + * @brief Declaration of DiracDeterminantRef with a S(ingle)P(article)O(rbital)Set + */ +#ifndef QMCPLUSPLUS_DIRACDETERMINANT_REF_H +#define QMCPLUSPLUS_DIRACDETERMINANT_REF_H + +#include "QMCWaveFunctions/WaveFunctionComponent.h" +#include "QMCWaveFunctions/SPOSet.h" +#include "Utilities/NewTimer.h" +#include "QMCWaveFunctions/DelayedUpdate.h" +#if defined(ENABLE_CUDA) +#include "QMCWaveFunctions/DelayedUpdateCUDA.h" +#endif + +namespace miniqmcreference +{ +using namespace qmcplusplus; + +template> +class DiracDeterminantRef : public WaveFunctionComponent +{ +public: + using ValueVector_t = Vector; + using ValueMatrix_t = Matrix; + using GradVector_t = Vector; + using GradMatrix_t = Matrix; + + using mValueType = QMCTraits::QTFull::ValueType; + using ValueMatrix_hp_t = Matrix; + using mGradType = TinyVector; + + /** constructor + *@param spos the single-particle orbital set + *@param first index of the first particle + */ + DiracDeterminantRef(SPOSet* const spos, int first = 0, int delay = 1); + + // copy constructor and assign operator disabled + DiracDeterminantRef(const DiracDeterminantRef& s) = delete; + DiracDeterminantRef& operator=(const DiracDeterminantRef& s) = delete; + + ///invert psiM or its copies + void invertPsiM(const ValueMatrix_t& logdetT, ValueMatrix_t& invMat); + + void evaluateGL(ParticleSet& P, + ParticleSet::ParticleGradient_t& G, + ParticleSet::ParticleLaplacian_t& L, + bool fromscratch = false); + + /** return the ratio only for the iat-th partcle move + * @param P current configuration + * @param iat the particle thas is being moved + */ + ValueType ratio(ParticleSet& P, int iat); + + /** compute multiple ratios for a particle move + void evaluateRatios(VirtualParticleSet& VP, std::vector& ratios); + */ + + ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat); + GradType evalGrad(ParticleSet& P, int iat); + + /** move was accepted, update the real container + */ + void acceptMove(ParticleSet& P, int iat); + void completeUpdates(); + + ///evaluate log of a determinant for a particle set + RealType evaluateLog(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L); + + void recompute(ParticleSet& P); + + /// psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ + ValueMatrix_t psiM_temp; + + /// inverse transpose of psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ + ValueMatrix_t psiM; + + /// dpsiM(i,j) \f$= \nabla_i \psi_j({\bf r}_i)\f$ + GradMatrix_t dpsiM; + + /// d2psiM(i,j) \f$= \nabla_i^2 \psi_j({\bf r}_i)\f$ + ValueMatrix_t d2psiM; + + /// value of single-particle orbital for particle-by-particle update + ValueVector_t psiV; + GradVector_t dpsiV; + ValueVector_t d2psiV; + + /// delayed update engine + DU_TYPE updateEng; + + /// the row of up-to-date inverse matrix + ValueVector_t invRow; + + /** row id correspond to the up-to-date invRow. [0 norb), invRow is ready; -1, invRow is not valid. + * This id is set after calling getInvRow indicating invRow has been prepared for the invRow_id row + * ratioGrad checks if invRow_id is consistent. If not, invRow needs to be recomputed. + * acceptMove and completeUpdates mark invRow invalid by setting invRow_id to -1 + */ + int invRow_id; + + ValueType curRatio; + +private: + /// Timers + NewTimer* UpdateTimer; + NewTimer* RatioTimer; + NewTimer* InverseTimer; + NewTimer* BufferTimer; + NewTimer* SPOVTimer; + NewTimer* SPOVGLTimer; + /// a set of single-particle orbitals used to fill in the values of the matrix + SPOSet* const Phi; + ///index of the first particle with respect to the particle set + int FirstIndex; + ///index of the last particle with respect to the particle set + int LastIndex; + ///number of single-particle orbitals which belong to this Dirac determinant + int NumOrbitals; + ///number of particles which belong to this Dirac determinant + int NumPtcls; + /// delayed update rank + int ndelay; + + ///reset the size: with the number of particles and number of orbtials + void resize(int nel, int morb); +}; + + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/WaveFunctionComponent.h b/src/QMCWaveFunctions/WaveFunctionComponent.h index b1bd0390a..5f442836c 100644 --- a/src/QMCWaveFunctions/WaveFunctionComponent.h +++ b/src/QMCWaveFunctions/WaveFunctionComponent.h @@ -169,6 +169,10 @@ struct WaveFunctionComponent : public QMCTraits ParticleSet::ParticleLaplacian_t& L, bool fromscratch = false) = 0; + /** complete all the delayed updates, must be called after each substep or step during pbyp move + */ + virtual void completeUpdates(){}; + /// operates on multiple walkers virtual void multi_evaluateLog(const std::vector& WFC_list, const std::vector& P_list, diff --git a/src/QMCWaveFunctions/einspline_spo_ref.hpp b/src/QMCWaveFunctions/einspline_spo_ref.hpp index dab85838a..cee60b8b7 100644 --- a/src/QMCWaveFunctions/einspline_spo_ref.hpp +++ b/src/QMCWaveFunctions/einspline_spo_ref.hpp @@ -86,6 +86,7 @@ struct einspline_spo_ref : public SPOSet */ einspline_spo_ref(const einspline_spo_ref& in, int team_size, int member_id) : Owner(false), Lattice(in.Lattice) { + OrbitalSetSize = in.OrbitalSetSize; nSplines = in.nSplines; nSplinesPerBlock = in.nSplinesPerBlock; nBlocks = (in.nBlocks + team_size - 1) / team_size; @@ -124,6 +125,9 @@ struct einspline_spo_ref : public SPOSet // fix for general num_splines void set(int nx, int ny, int nz, int num_splines, int nblocks, bool init_random = true) { + // setting OrbitalSetSize to num_splines made artificial only in miniQMC + OrbitalSetSize = num_splines; + nSplines = num_splines; nBlocks = nblocks; nSplinesPerBlock = num_splines / nblocks; @@ -165,6 +169,18 @@ struct einspline_spo_ref : public SPOSet MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) + { + evaluate_v(P, iat); + + for (int i = 0; i < nBlocks; ++i) + { + // in real simulation, phase needs to be applied. Here just fake computation + const int first = i*nBlocks; + std::copy_n(psi[i].data(), std::min((i+1)*nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data()+first); + } + } + /** evaluate psi, grad and lap */ inline void evaluate_vgl(const ParticleSet& P, int iat) { @@ -185,6 +201,24 @@ struct einspline_spo_ref : public SPOSet nSplinesPerBlock); } + inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) + { + evaluate_vgh(P, iat); + + for (int i = 0; i < nBlocks; ++i) + { + // in real simulation, phase needs to be applied. Here just fake computation + const int first = i*nBlocks; + for (int j = first; j < std::min((i+1)*nSplinesPerBlock, OrbitalSetSize); j++) + { + psi_v[j] = psi[i][j-first]; + dpsi_v[j] = grad[i][j-first]; + d2psi_v[j] = hess[i].data(0)[j-first] + hess[i].data(1)[j-first] + hess[i].data(2)[j-first] + + hess[i].data(3)[j-first] + hess[i].data(4)[j-first] + hess[i].data(5)[j-first]; + } + } + } + void print(std::ostream& os) { os << "SPO nBlocks=" << nBlocks << " firstBlock=" << firstBlock << " lastBlock=" << lastBlock From c4f316a9c1d3db0171609793272bf5456b04340f Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Tue, 4 Jun 2019 23:18:01 -0500 Subject: [PATCH 12/29] Remove old Determinant --- src/Drivers/CMakeLists.txt | 30 +-- src/Drivers/check_determinant.cpp | 221 ------------------- src/QMCWaveFunctions/Determinant.h | 305 -------------------------- src/QMCWaveFunctions/DeterminantRef.h | 305 -------------------------- src/QMCWaveFunctions/WaveFunction.cpp | 8 +- 5 files changed, 15 insertions(+), 854 deletions(-) delete mode 100644 src/Drivers/check_determinant.cpp delete mode 100644 src/QMCWaveFunctions/Determinant.h delete mode 100644 src/QMCWaveFunctions/DeterminantRef.h diff --git a/src/Drivers/CMakeLists.txt b/src/Drivers/CMakeLists.txt index c6ad9ec63..9ab2bb5c8 100644 --- a/src/Drivers/CMakeLists.txt +++ b/src/Drivers/CMakeLists.txt @@ -1,25 +1,17 @@ -if(QMC_BUILD_LEVEL GREATER 4) -# add apps XYZ.cpp, e.g., qmc_particles.cpp -#SET(ESTEST einspline_smp einspline_spo qmc_particles moveonsphere twobody ptclset) -SET(ESTEST check_spo check_determinant) +#////////////////////////////////////////////////////////////////////////////////////// +#// This file is distributed under the University of Illinois/NCSA Open Source License. +#// See LICENSE file in top directory for details. +#// +#// Copyright (c) 2019 QMCPACK developers. +#// +#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +#// +#// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +#////////////////////////////////////////////////////////////////////////////////////// -FOREACH(p ${ESTEST}) - ADD_EXECUTABLE( ${p} ${p}.cpp) - TARGET_LINK_LIBRARIES(${p} qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) -ENDFOREACH(p ${ESTEST}) - -SET(DRIVERS check_wfc miniqmc miniqmc_sync_move) +SET(DRIVERS check_spo check_wfc miniqmc miniqmc_sync_move) FOREACH(p ${DRIVERS}) ADD_EXECUTABLE( ${p} ${p}.cpp) TARGET_LINK_LIBRARIES(${p} qmcwfs qmcbase qmcutil ${QMC_UTIL_LIBS} ${MPI_LIBRARY}) ENDFOREACH(p ${ESTEST}) - -endif() - -#SET(boost_test exchange_walker) -#FOREACH(p ${boost_test}) -# ADD_EXECUTABLE( ${p} ${p}.cpp) -# TARGET_LINK_LIBRARIES(${p} qmcbase qmcutil ${QMC_UTIL_LIBS} boost ${MPI_LIBRARY}) -#ENDFOREACH(p ${boost_test}) - diff --git a/src/Drivers/check_determinant.cpp b/src/Drivers/check_determinant.cpp deleted file mode 100644 index 3ec239632..000000000 --- a/src/Drivers/check_determinant.cpp +++ /dev/null @@ -1,221 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: M. Graham Lopez, Oak Ridge National Lab -// -// File created by: Jeongnim Kim, Intel -//////////////////////////////////////////////////////////////////////////////// -// -*- C++ -*- -// clang-format off -/** @file check_determinant.cpp - - - Compares against a reference implementation for correctness. - - */ - -// clang-format on - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace qmcplusplus; - - -void print_help() -{ - // clang-format off - cout << "usage:" << '\n'; - cout << " check_determinant [-hvV] [-g \"n0 n1 n2\"] [-n steps]" << '\n'; - cout << " [-N substeps] [-s seed]" << '\n'; - cout << "options:" << '\n'; - cout << " -g set the 3D tiling. default: 1 1 1" << '\n'; - cout << " -h print help and exit" << '\n'; - cout << " -n number of MC steps default: 5" << '\n'; - cout << " -N number of MC substeps default: 1" << '\n'; - cout << " -s set the random seed. default: 11" << '\n'; - cout << " -v verbose output" << '\n'; - cout << " -V print version information and exit" << '\n'; - // clang-format on - - exit(1); // print help and exit -} -int main(int argc, char** argv) -{ - // clang-format off - typedef QMCTraits::RealType RealType; - typedef ParticleSet::ParticlePos_t ParticlePos_t; - typedef ParticleSet::PosType PosType; - // clang-format on - - int na = 1; - int nb = 1; - int nc = 1; - int nsteps = 5; - int iseed = 11; - int nsubsteps = 1; - int np = omp_get_max_threads(); - - PrimeNumberSet myPrimes; - - bool verbose = false; - - int opt; - while (optind < argc) - { - if ((opt = getopt(argc, argv, "hvVg:n:N:r:s:")) != -1) - { - switch (opt) - { - case 'g': // tiling1 tiling2 tiling3 - sscanf(optarg, "%d %d %d", &na, &nb, &nc); - break; - case 'h': - print_help(); - break; - case 'n': - nsteps = atoi(optarg); - break; - case 'N': - nsubsteps = atoi(optarg); - break; - case 's': - iseed = atoi(optarg); - break; - case 'v': - verbose = true; - break; - case 'V': - print_version(true); - return 1; - break; - default: - print_help(); - } - } - else // disallow non-option arguments - { - cerr << "Non-option arguments not allowed" << endl; - print_help(); - } - } - - Tensor tmat(na, 0, 0, 0, nb, 0, 0, 0, nc); - - // setup ions - ParticleSet ions; - Tensor lattice_b; - build_ions(ions, tmat, lattice_b); - - print_version(verbose); - - if (verbose) - outputManager.setVerbosity(Verbosity::HIGH); - else - outputManager.setVerbosity(Verbosity::LOW); - - double accumulated_error = 0.0; - - #pragma omp parallel reduction(+ : accumulated_error) - { - int ip = omp_get_thread_num(); - - // create generator within the thread - RandomGenerator random_th(myPrimes[ip]); - - ParticleSet els; - build_els(els, ions, random_th); - els.update(); - - const int nions = ions.getTotalNum(); - const int nels = els.getTotalNum(); - const int nels3 = 3 * nels; - - miniqmcreference::DiracDeterminantRef determinant_ref(nels, random_th); - determinant_ref.checkMatrix(); - DiracDeterminant determinant(nels, random_th); - determinant.checkMatrix(); - - ParticlePos_t delta(nels); - - RealType accept = 0.5; - - aligned_vector ur(nels); - random_th.generate_uniform(ur.data(), nels); - - els.update(); - - int my_accepted = 0; - for (int mc = 0; mc < nsteps; ++mc) - { - determinant_ref.recompute(); - determinant.recompute(); - for (int l = 0; l < nsubsteps; ++l) // drift-and-diffusion - { - random_th.generate_normal(&delta[0][0], nels3); - for (int iel = 0; iel < nels; ++iel) - { - // Operate on electron with index iel - els.setActive(iel); - - // Construct trial move - els.makeMove(iel, delta[iel]); - - // Compute gradient at the trial position - - determinant_ref.ratio(els, iel); - determinant.ratio(els, iel); - - // Accept/reject the trial move - if (ur[iel] < accept) // MC - { - // Update position, and update temporary storage - els.acceptMove(iel); - determinant_ref.acceptMove(els, iel); - determinant.acceptMove(els, iel); - my_accepted++; - } - else - { - els.rejectMove(iel); - } - } // iel - } // substeps - - els.donePbyP(); - } - - // accumulate error - for (int i = 0; i < determinant_ref.size(); i++) - { - accumulated_error += std::fabs(determinant_ref(i) - determinant(i)); - } - } // end of omp parallel - - constexpr double small_err = std::numeric_limits::epsilon() * 6e8; - - cout << "total accumulated error of " << accumulated_error << " for " << np << " procs" << '\n'; - - if (accumulated_error / np > small_err) - { - cout << "Checking failed with accumulated error: " << accumulated_error / np << " > " - << small_err << '\n'; - return 1; - } - else - cout << "All checks passed for determinant" << '\n'; - - return 0; -} diff --git a/src/QMCWaveFunctions/Determinant.h b/src/QMCWaveFunctions/Determinant.h deleted file mode 100644 index ce82e75c0..000000000 --- a/src/QMCWaveFunctions/Determinant.h +++ /dev/null @@ -1,305 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. -// -// Copyright (c) 2017 QMCPACK developers. -// -// File developed by: M. Graham Lopez -// -// File created by: M. Graham Lopez -//////////////////////////////////////////////////////////////////////////////// -// -*- C++ -*- - -/** - * @file Determinant.h - * @brief Determinant piece of the wave function - */ - -#ifndef QMCPLUSPLUS_DETERMINANT_H -#define QMCPLUSPLUS_DETERMINANT_H - -#include "Numerics/OhmmsPETE/OhmmsMatrix.h" -#include "Numerics/DeterminantOperators.h" -#include "QMCWaveFunctions/WaveFunctionComponent.h" -#include "Utilities/Constants.h" - -namespace qmcplusplus -{ -/**@{Determinant utilities */ -/** Inversion of a double matrix after LU factorization*/ -inline void - getri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork) -{ - int status; - dgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a float matrix after LU factorization*/ -inline void - getri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork) -{ - int status; - sgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a std::complex matrix after LU factorization*/ -inline void getri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - zgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a complex matrix after LU factorization*/ -inline void getri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - cgetri(n, a, lda, piv, work, lwork, status); -} - - -/** query the size of workspace for Xgetri after LU decompisiton */ -template -inline int getGetriWorkspace(T* restrict x, int n, int lda, int* restrict pivot) -{ - T work; - int lwork = -1; - getri(n, x, lda, pivot, &work, lwork); - lwork = static_cast(work); - return lwork; -} - -/** transpose in to out - * - * Assume: in[n][lda] and out[n][lda] - */ -template -inline void transpose(const TIN* restrict in, TOUT* restrict out, int n, int lda) -{ - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - out[i * lda + j] = in[i + j * lda]; -} - -/// used only for debugging or walker move -template -inline T - InvertWithLog(T* restrict x, int n, int lda, T* restrict work, int lwork, int* restrict pivot, T& phase) -{ - T logdet(0.0); - LUFactorization(n, n, x, lda, pivot); - int sign_det = 1; - for (int i = 0; i < n; i++) - { - sign_det *= (pivot[i] == i + 1) ? 1 : -1; - sign_det *= (x[i * lda + i] > 0) ? 1 : -1; - logdet += std::log(std::abs(x[i * lda + i])); - } - getri(n, x, lda, pivot, work, lwork); - phase = (sign_det > 0) ? 0.0 : M_PI; - return logdet; -} - -/// inner product -template -inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res) -{ - for (int i = 0; i < n; ++i) - res += a[i] * b[i]; - return res; -} - -/// recompute inverse, do not evaluate log|det| -template -inline void - InvertOnly(T* restrict x, int n, int lda, T* restrict work, int* restrict pivot, int lwork) -{ - LUFactorization(n, n, x, lda, pivot); - getri(n, x, lda, pivot, work, lwork); -} - -/** update Row as implemented in the full code */ -/** [UpdateRow] */ -template -inline void - updateRow(T* restrict pinv, const T* restrict tv, int m, int lda, int rowchanged, RT c_ratio_in) -{ - constexpr T cone(1); - constexpr T czero(0); - T temp[m], rcopy[m]; - T c_ratio = cone / c_ratio_in; - BLAS::gemv('T', m, m, c_ratio, pinv, m, tv, 1, czero, temp, 1); - temp[rowchanged] = cone - c_ratio; - std::copy_n(pinv + m * rowchanged, m, rcopy); - BLAS::ger(m, m, -cone, rcopy, 1, temp, 1, pinv, m); -} -/** [UpdateRow] */ -/**@}*/ - -// FIXME do we want to keep this in the miniapp? -template -void checkIdentity(const MT1& a, const MT2& b, const std::string& tag) -{ - constexpr double czero(0.0); - constexpr double cone(1.0); - const int nrows = a.rows(); - const int ncols = a.cols(); - double error = czero; - for (int i = 0; i < nrows; ++i) - { - for (int j = 0; j < nrows; ++j) - { - double e = inner_product_n(a[i], b[j], ncols, czero); - error += (i == j) ? std::abs(e - cone) : std::abs(e); - } - } - #pragma omp master - std::cout << tag << " difference from identity (average per element) = " << error / nrows / nrows - << std::endl; -} - -// FIXME do we want to keep this in the miniapp? -template -void checkDiff(const MT1& a, const MT2& b, const std::string& tag) -{ - const int nrows = a.rows(); - const int ncols = a.cols(); - constexpr double czero(0.0); - double error = czero; - for (int i = 0; i < nrows; ++i) - for (int j = 0; j < ncols; ++j) - error += std::abs(static_cast(a(i, j) - b(i, j))); - - #pragma omp master - std::cout << tag << " difference between matrices (average per element) = " << error / nrows / nrows - << std::endl; -} - -struct DiracDeterminant : public WaveFunctionComponent -{ - DiracDeterminant(int nels, const RandomGenerator& RNG, int First = 0) - : FirstIndex(First), myRandom(RNG) - { - psiMinv.resize(nels, nels); - psiV.resize(nels); - psiM.resize(nels, nels); - - pivot.resize(nels); - psiMsave.resize(nels, nels); - - // now we "void initialize(RandomGenerator RNG)" - - nels = psiM.rows(); - // get lwork and resize workspace - LWork = getGetriWorkspace(psiM.data(), nels, nels, pivot.data()); - work.resize(LWork); - - constexpr double shift(0.5); - myRandom.generate_uniform(psiMsave.data(), nels * nels); - psiMsave -= shift; - - double phase; - transpose(psiMsave.data(), psiM.data(), nels, nels); - LogValue = InvertWithLog(psiM.data(), nels, nels, work.data(), LWork, pivot.data(), phase); - std::copy_n(psiM.data(), nels * nels, psiMinv.data()); - } - - void checkMatrix() - { - if (omp_get_num_threads() == 1) - { - checkIdentity(psiMsave, psiM, "Psi_0 * psiM(double)"); - checkIdentity(psiMsave, psiMinv, "Psi_0 * psiMinv(T)"); - checkDiff(psiM, psiMinv, "psiM(double)-psiMinv(T)"); - } - } - - RealType evaluateLog(ParticleSet& P, - ParticleSet::ParticleGradient_t& G, - ParticleSet::ParticleLaplacian_t& L) - { - recompute(); - // FIXME do we want remainder of evaluateLog? - return 0.0; - } - - GradType evalGrad(ParticleSet& P, int iat) { return GradType(); } - - ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad) { return ratio(P, iat); } - - void evaluateGL(ParticleSet& P, - ParticleSet::ParticleGradient_t& G, - ParticleSet::ParticleLaplacian_t& L, - bool fromscratch = false) - {} - - /// recompute the inverse - inline void recompute() - { - const int nels = psiV.size(); - transpose(psiMsave.data(), psiM.data(), nels, nels); - InvertOnly(psiM.data(), nels, nels, work.data(), pivot.data(), LWork); - std::copy_n(psiM.data(), nels * nels, psiMinv.data()); - } - - /** return determinant ratio for the row replacement - * @param iel the row (active particle) index - */ - inline ValueType ratio(ParticleSet& P, int iel) - { - const int nels = psiV.size(); - constexpr double shift(0.5); - constexpr double czero(0); - for (int j = 0; j < nels; ++j) - psiV[j] = myRandom() - shift; - curRatio = inner_product_n(psiV.data(), psiMinv[iel - FirstIndex], nels, czero); - return curRatio; - } - - /** accept the row and update the inverse */ - inline void acceptMove(ParticleSet& P, int iel) - { - const int nels = psiV.size(); - updateRow(psiMinv.data(), psiV.data(), nels, nels, iel - FirstIndex, curRatio); - std::copy_n(psiV.data(), nels, psiMsave[iel - FirstIndex]); - } - - /** accessor functions for checking */ - inline double operator()(int i) const { return psiMinv(i); } - inline int size() const { return psiMinv.size(); } - -private: - /// log|det| - double LogValue; - /// current ratio - double curRatio; - /// workspace size - int LWork; - /// initial particle index - const int FirstIndex; - /// inverse matrix to be update - Matrix psiMinv; - /// a SPO set for the row update - aligned_vector psiV; - /// internal storage to perform inversion correctly - Matrix psiM; // matrix to be inverted - /// random number generator for testing - RandomGenerator myRandom; - - // temporary workspace for inversion - aligned_vector pivot; - aligned_vector work; - Matrix psiMsave; -}; -} // namespace qmcplusplus - -#endif diff --git a/src/QMCWaveFunctions/DeterminantRef.h b/src/QMCWaveFunctions/DeterminantRef.h deleted file mode 100644 index 646914fbf..000000000 --- a/src/QMCWaveFunctions/DeterminantRef.h +++ /dev/null @@ -1,305 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source -// License. See LICENSE file in top directory for details. -// -// Copyright (c) 2017 QMCPACK developers. -// -// File developed by: M. Graham Lopez -// -// File created by: M. Graham Lopez -//////////////////////////////////////////////////////////////////////////////// -// -*- C++ -*- - -/** - * @file DeterminantRef.h - * @brief Determinant piece of the wave function - * - * This is the reference implementation - DO NOT MODIFY - */ - -#ifndef QMCPLUSPLUS_DETERMINANTREF_H -#define QMCPLUSPLUS_DETERMINANTREF_H - -#include "Numerics/OhmmsPETE/OhmmsMatrix.h" -#include "Numerics/DeterminantOperators.h" -#include "Particle/ParticleSet.h" -#include "QMCWaveFunctions/WaveFunctionComponent.h" -#include "Utilities/Constants.h" - -namespace miniqmcreference -{ -/**@{Determinant utilities */ -/** Inversion of a double matrix after LU factorization*/ -inline void - getri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork) -{ - int status; - dgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a float matrix after LU factorization*/ -inline void - getri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork) -{ - int status; - sgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a std::complex matrix after LU factorization*/ -inline void getri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - zgetri(n, a, lda, piv, work, lwork, status); -} - -/** Inversion of a complex matrix after LU factorization*/ -inline void getri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - cgetri(n, a, lda, piv, work, lwork, status); -} - -/** query the size of workspace for Xgetri after LU decompisiton */ -template -inline int getGetriWorkspace(T* restrict x, int n, int lda, int* restrict pivot) -{ - T work; - int lwork = -1; - getri(n, x, lda, pivot, &work, lwork); - lwork = static_cast(work); - return lwork; -} - -/** transpose in to out - * - * Assume: in[n][lda] and out[n][lda] - */ -template -inline void transpose(const TIN* restrict in, TOUT* restrict out, int n, int lda) -{ - for (int i = 0; i < n; ++i) - for (int j = 0; j < n; ++j) - out[i * lda + j] = in[i + j * lda]; -} - -/// used only for debugging or walker move -template -inline T - InvertWithLog(T* restrict x, int n, int lda, T* restrict work, int lwork, int* restrict pivot, T& phase) -{ - T logdet(0.0); - qmcplusplus::LUFactorization(n, n, x, lda, pivot); - int sign_det = 1; - for (int i = 0; i < n; i++) - { - sign_det *= (pivot[i] == i + 1) ? 1 : -1; - sign_det *= (x[i * lda + i] > 0) ? 1 : -1; - logdet += std::log(std::abs(x[i * lda + i])); - } - getri(n, x, lda, pivot, work, lwork); - phase = (sign_det > 0) ? 0.0 : M_PI; - return logdet; -} - -/// inner product -template -inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res) -{ - for (int i = 0; i < n; ++i) - res += a[i] * b[i]; - return res; -} - -/// recompute inverse, do not evaluate log|det| -template -inline void - InvertOnly(T* restrict x, int n, int lda, T* restrict work, int* restrict pivot, int lwork) -{ - qmcplusplus::LUFactorization(n, n, x, lda, pivot); - getri(n, x, lda, pivot, work, lwork); -} - -/** update Row as implemented in the full code */ -template -inline void - updateRow(T* restrict pinv, const T* restrict tv, int m, int lda, int rowchanged, RT c_ratio_in) -{ - constexpr T cone(1); - constexpr T czero(0); - T temp[m], rcopy[m]; - T c_ratio = cone / c_ratio_in; - BLAS::gemv('T', m, m, c_ratio, pinv, m, tv, 1, czero, temp, 1); - temp[rowchanged] = cone - c_ratio; - std::copy_n(pinv + m * rowchanged, m, rcopy); - BLAS::ger(m, m, -cone, rcopy, 1, temp, 1, pinv, m); -} -/**@}*/ - -// FIXME do we want to keep this in the miniapp? -template -void checkIdentity(const MT1& a, const MT2& b, const std::string& tag) -{ - constexpr double czero(0.0); - constexpr double cone(1.0); - const int nrows = a.rows(); - const int ncols = a.cols(); - double error = czero; - for (int i = 0; i < nrows; ++i) - { - for (int j = 0; j < nrows; ++j) - { - double e = inner_product_n(a[i], b[j], ncols, czero); - error += (i == j) ? std::abs(e - cone) : std::abs(e); - } - } - #pragma omp master - std::cout << tag << " difference from identity (average per element) = " << error / nrows / nrows - << std::endl; -} - -// FIXME do we want to keep this in the miniapp? -template -void checkDiff(const MT1& a, const MT2& b, const std::string& tag) -{ - const int nrows = a.rows(); - const int ncols = a.cols(); - constexpr double czero(0.0); - double error = czero; - for (int i = 0; i < nrows; ++i) - for (int j = 0; j < ncols; ++j) - error += std::abs(static_cast(a(i, j) - b(i, j))); - - #pragma omp master - std::cout << tag << " difference between matrices (average per element) = " << error / nrows / nrows - << std::endl; -} - -struct DiracDeterminantRef : public qmcplusplus::WaveFunctionComponent -{ - using ParticleSet = qmcplusplus::ParticleSet; - - DiracDeterminantRef(int nels, const qmcplusplus::RandomGenerator& RNG, int First = 0) - : FirstIndex(First), myRandom(RNG) - { - psiMinv.resize(nels, nels); - psiV.resize(nels); - psiM.resize(nels, nels); - - pivot.resize(nels); - psiMsave.resize(nels, nels); - - // now we "void initialize(RandomGenerator RNG)" - - nels = psiM.rows(); - // get lwork and resize workspace - LWork = getGetriWorkspace(psiM.data(), nels, nels, pivot.data()); - work.resize(LWork); - - constexpr double shift(0.5); - myRandom.generate_uniform(psiMsave.data(), nels * nels); - psiMsave -= shift; - - double phase; - transpose(psiMsave.data(), psiM.data(), nels, nels); - LogValue = InvertWithLog(psiM.data(), nels, nels, work.data(), LWork, pivot.data(), phase); - std::copy_n(psiM.data(), nels * nels, psiMinv.data()); - } - - void checkMatrix() - { - if (omp_get_num_threads() == 1) - { - // qualified invocation to defeat ADL - miniqmcreference::checkIdentity(psiMsave, psiM, "Psi_0 * psiM(double)"); - miniqmcreference::checkIdentity(psiMsave, psiMinv, "Psi_0 * psiMinv(T)"); - miniqmcreference::checkDiff(psiM, psiMinv, "psiM(double)-psiMinv(T)"); - } - } - - RealType evaluateLog(ParticleSet& P, - ParticleSet::ParticleGradient_t& G, - ParticleSet::ParticleLaplacian_t& L) - { - recompute(); - // FIXME do we want remainder of evaluateLog? - return 0.0; - } - GradType evalGrad(ParticleSet& P, int iat) { return GradType(); } - ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad) { return ratio(P, iat); } - void evaluateGL(ParticleSet& P, - ParticleSet::ParticleGradient_t& G, - ParticleSet::ParticleLaplacian_t& L, - bool fromscratch = false) - {} - - /// recompute the inverse - inline void recompute() - { - const int nels = psiV.size(); - transpose(psiMsave.data(), psiM.data(), nels, nels); - InvertOnly(psiM.data(), nels, nels, work.data(), pivot.data(), LWork); - std::copy_n(psiM.data(), nels * nels, psiMinv.data()); - } - - /** return determinant ratio for the row replacement - * @param iel the row (active particle) index - */ - inline ValueType ratio(ParticleSet& P, int iel) - { - const int nels = psiV.size(); - constexpr double shift(0.5); - constexpr double czero(0); - for (int j = 0; j < nels; ++j) - psiV[j] = myRandom() - shift; - curRatio = inner_product_n(psiV.data(), psiMinv[iel - FirstIndex], nels, czero); - return curRatio; - } - - /** accept the row and update the inverse */ - inline void acceptMove(ParticleSet& P, int iel) - { - const int nels = psiV.size(); - updateRow(psiMinv.data(), psiV.data(), nels, nels, iel - FirstIndex, curRatio); - std::copy_n(psiV.data(), nels, psiMsave[iel - FirstIndex]); - } - - /** accessor functions for checking */ - inline double operator()(int i) const { return psiMinv(i); } - inline int size() const { return psiMinv.size(); } - -private: - /// log|det| - double LogValue; - /// current ratio - double curRatio; - /// workspace size - int LWork; - /// initial particle index - const int FirstIndex; - /// inverse matrix to be update - qmcplusplus::Matrix psiMinv; - /// a SPO set for the row update - qmcplusplus::aligned_vector psiV; - /// internal storage to perform inversion correctly - qmcplusplus::Matrix psiM; // matrix to be inverted - /// random number generator for testing - qmcplusplus::RandomGenerator myRandom; - - // temporary workspace for inversion - qmcplusplus::aligned_vector pivot; - qmcplusplus::aligned_vector work; - qmcplusplus::Matrix psiMsave; -}; -} // namespace miniqmcreference - -#endif diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 026c9b3cc..65393d5d9 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -16,8 +16,8 @@ */ #include +#include #include -#include #include #include #include @@ -68,7 +68,7 @@ void build_WaveFunction(bool useRef, using J1OrbType = miniqmcreference::OneBodyJastrowRef>; using J2OrbType = miniqmcreference::TwoBodyJastrowRef>; using J3OrbType = miniqmcreference::ThreeBodyJastrowRef; - using DetType = miniqmcreference::DiracDeterminantRef; + using DetType = miniqmcreference::DiracDeterminantRef<>; ions.RSoA = ions.R; els.RSoA = els.R; @@ -79,8 +79,8 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; - WF.Det_up = new DetType(nelup, RNG, 0); - WF.Det_dn = new DetType(els.getTotalNum() - nelup, RNG, nelup); + WF.Det_up = new DetType(WF.spo, 0, delay_rank); + WF.Det_dn = new DetType(WF.spo, nelup, delay_rank); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); From d30ef2e0f6d624ba23af333f1a19db62afb8804d Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 10:15:52 -0500 Subject: [PATCH 13/29] Remove redundant calculation --- src/QMCWaveFunctions/DiracDeterminant.cpp | 12 ++++++------ src/QMCWaveFunctions/DiracDeterminantRef.cpp | 12 ++++++------ src/QMCWaveFunctions/WaveFunction.cpp | 12 ------------ 3 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 84424f7b9..f8d4474ea 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -36,12 +36,12 @@ DiracDeterminant::DiracDeterminant(SPOSet* const spos, int first, int d NumPtcls(spos->size()), NumOrbitals(spos->size()) { - UpdateTimer = TimerManager.createTimer("DiracDeterminant::update", timer_level_fine); - RatioTimer = TimerManager.createTimer("DiracDeterminant::ratio", timer_level_fine); - InverseTimer = TimerManager.createTimer("DiracDeterminant::inverse", timer_level_fine); - BufferTimer = TimerManager.createTimer("DiracDeterminant::buffer", timer_level_fine); - SPOVTimer = TimerManager.createTimer("DiracDeterminant::spoval", timer_level_fine); - SPOVGLTimer = TimerManager.createTimer("DiracDeterminant::spovgl", timer_level_fine); + UpdateTimer = TimerManager.createTimer("Determinant::update", timer_level_fine); + RatioTimer = TimerManager.createTimer("Determinant::ratio", timer_level_fine); + InverseTimer = TimerManager.createTimer("Determinant::inverse", timer_level_fine); + BufferTimer = TimerManager.createTimer("Determinant::buffer", timer_level_fine); + SPOVTimer = TimerManager.createTimer("Determinant::spoval", timer_level_fine); + SPOVGLTimer = TimerManager.createTimer("Determinant::spovgl", timer_level_fine); resize(spos->size(), spos->size()); } diff --git a/src/QMCWaveFunctions/DiracDeterminantRef.cpp b/src/QMCWaveFunctions/DiracDeterminantRef.cpp index dd4678f9d..416cc66fb 100644 --- a/src/QMCWaveFunctions/DiracDeterminantRef.cpp +++ b/src/QMCWaveFunctions/DiracDeterminantRef.cpp @@ -38,12 +38,12 @@ DiracDeterminantRef::DiracDeterminantRef(SPOSet* const spos, int first, NumPtcls(spos->size()), NumOrbitals(spos->size()) { - UpdateTimer = TimerManager.createTimer("DiracDeterminantRef::update", timer_level_fine); - RatioTimer = TimerManager.createTimer("DiracDeterminantRef::ratio", timer_level_fine); - InverseTimer = TimerManager.createTimer("DiracDeterminantRef::inverse", timer_level_fine); - BufferTimer = TimerManager.createTimer("DiracDeterminantRef::buffer", timer_level_fine); - SPOVTimer = TimerManager.createTimer("DiracDeterminantRef::spoval", timer_level_fine); - SPOVGLTimer = TimerManager.createTimer("DiracDeterminantRef::spovgl", timer_level_fine); + UpdateTimer = TimerManager.createTimer("DeterminantRef::update", timer_level_fine); + RatioTimer = TimerManager.createTimer("DeterminantRef::ratio", timer_level_fine); + InverseTimer = TimerManager.createTimer("DeterminantRef::inverse", timer_level_fine); + BufferTimer = TimerManager.createTimer("DeterminantRef::buffer", timer_level_fine); + SPOVTimer = TimerManager.createTimer("DeterminantRef::spoval", timer_level_fine); + SPOVGLTimer = TimerManager.createTimer("DeterminantRef::spovgl", timer_level_fine); resize(spos->size(), spos->size()); } diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 65393d5d9..bd4f40956 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -193,9 +193,7 @@ void WaveFunction::evaluateLog(ParticleSet& P) WaveFunction::posT WaveFunction::evalGrad(ParticleSet& P, int iat) { - timers[Timer_Det]->start(); posT grad_iat = (iat < nelup ? Det_up->evalGrad(P, iat) : Det_dn->evalGrad(P, iat)); - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -208,12 +206,8 @@ WaveFunction::posT WaveFunction::evalGrad(ParticleSet& P, int iat) WaveFunction::valT WaveFunction::ratioGrad(ParticleSet& P, int iat, posT& grad) { - spo->evaluate_vgh(P, iat); - - timers[Timer_Det]->start(); grad = valT(0); valT ratio = (iat < nelup ? Det_up->ratioGrad(P, iat, grad) : Det_dn->ratioGrad(P, iat, grad)); - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -226,11 +220,7 @@ WaveFunction::valT WaveFunction::ratioGrad(ParticleSet& P, int iat, posT& grad) WaveFunction::valT WaveFunction::ratio(ParticleSet& P, int iat) { - spo->evaluate_v(P, iat); - - timers[Timer_Det]->start(); valT ratio = (iat < nelup ? Det_up->ratio(P, iat) : Det_dn->ratio(P, iat)); - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -243,12 +233,10 @@ WaveFunction::valT WaveFunction::ratio(ParticleSet& P, int iat) void WaveFunction::acceptMove(ParticleSet& P, int iat) { - timers[Timer_Det]->start(); if (iat < nelup) Det_up->acceptMove(P, iat); else Det_dn->acceptMove(P, iat); - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { From 2b3d2c28ccce3087e7e7451b02c204fc687898b0 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 10:25:51 -0500 Subject: [PATCH 14/29] Add completeUpdates to completeUpdates. --- src/Drivers/miniqmc.cpp | 1 + src/QMCWaveFunctions/WaveFunction.cpp | 6 ++++++ src/QMCWaveFunctions/WaveFunction.h | 1 + 3 files changed, 8 insertions(+) diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 2371dbe70..939809b44 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -459,6 +459,7 @@ int main(int argc, char** argv) } // iel } // substeps + wavefunction.completeUpdates(); els.donePbyP(); // evaluate Kinetic Energy diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index bd4f40956..0d44ccb36 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -246,6 +246,12 @@ void WaveFunction::acceptMove(ParticleSet& P, int iat) } } +void WaveFunction::completeUpdates() +{ + Det_up->completeUpdates(); + Det_dn->completeUpdates(); +} + void WaveFunction::restore(int iat) {} void WaveFunction::evaluateGL(ParticleSet& P) diff --git a/src/QMCWaveFunctions/WaveFunction.h b/src/QMCWaveFunctions/WaveFunction.h index 1f820058c..c9f226add 100644 --- a/src/QMCWaveFunctions/WaveFunction.h +++ b/src/QMCWaveFunctions/WaveFunction.h @@ -66,6 +66,7 @@ class WaveFunction valT ratio(ParticleSet& P, int iat); void acceptMove(ParticleSet& P, int iat); void restore(int iat); + void completeUpdates(); void evaluateGL(ParticleSet& P); /// operates on multiple walkers From 0dd50dce3f8772b944c5566e017402f55e9b1bf4 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 10:37:25 -0500 Subject: [PATCH 15/29] Add timer. --- src/QMCWaveFunctions/WaveFunction.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 0d44ccb36..2cc7ac49b 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -186,7 +186,11 @@ void WaveFunction::evaluateLog(ParticleSet& P) LogValue = Det_up->evaluateLog(P, P.G, P.L); LogValue += Det_dn->evaluateLog(P, P.G, P.L); for (size_t i = 0; i < Jastrows.size(); i++) + { + jastrow_timers[i]->start(); LogValue += Jastrows[i]->evaluateLog(P, P.G, P.L); + jastrow_timers[i]->stop(); + } FirstTime = false; } } From f6860209572809f3272775f2ee8d85bbdd422fcf Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 15:46:28 -0500 Subject: [PATCH 16/29] Fix batching interface in SPOSet --- src/QMCWaveFunctions/DiracDeterminant.cpp | 40 ++++++++++++++++++++++- src/QMCWaveFunctions/DiracDeterminant.h | 10 ++++++ src/QMCWaveFunctions/SPOSet.h | 18 +++++----- src/QMCWaveFunctions/WaveFunction.cpp | 23 +++---------- src/QMCWaveFunctions/WaveFunction.h | 4 +-- 5 files changed, 63 insertions(+), 32 deletions(-) diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index f8d4474ea..5ec4975ec 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -93,12 +93,20 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratioGr int iat, GradType& grad_iat) { + UpdateMode = ORB_PBYP_PARTIAL; SPOVGLTimer->start(); Phi->evaluate(P, iat, psiV, dpsiV, d2psiV); SPOVGLTimer->stop(); + return ratioGrad_compute(iat, grad_iat); +} + +template +typename DiracDeterminant::ValueType DiracDeterminant::ratioGrad_compute(int iat, + GradType& grad_iat) +{ RatioTimer->start(); const int WorkingIndex = iat - FirstIndex; - UpdateMode = ORB_PBYP_PARTIAL; + ValueType ratio; GradType rv; // This is an optimization. @@ -273,6 +281,36 @@ void DiracDeterminant::recompute(ParticleSet& P) } } +template +void DiracDeterminant::multi_ratioGrad(const std::vector& WFC_list, + const std::vector& P_list, + int iat, + std::vector& ratios, + std::vector& grad_new) +{ + SPOVGLTimer->start(); + std::vector phi_list; phi_list.reserve(WFC_list.size()); + std::vector psi_v_list; psi_v_list.reserve(WFC_list.size()); + std::vector dpsi_v_list; dpsi_v_list.reserve(WFC_list.size()); + std::vector d2psi_v_list; d2psi_v_list.reserve(WFC_list.size()); + + for(auto wfc : WFC_list) + { + auto det = static_cast*>(wfc); + phi_list.push_back(det->Phi); + psi_v_list.push_back(&(det->psiV)); + dpsi_v_list.push_back(&(det->dpsiV)); + d2psi_v_list.push_back(&(det->d2psiV)); + } + + Phi->multi_evaluate(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + SPOVGLTimer->stop(); + + //#pragma omp parallel for + for (int iw = 0; iw < P_list.size(); iw++) + ratios[iw] = static_cast*>(WFC_list[iw])->ratioGrad_compute(iat, grad_new[iw]); +} + typedef QMCTraits::ValueType ValueType; typedef QMCTraits::QTFull::ValueType mValueType; diff --git a/src/QMCWaveFunctions/DiracDeterminant.h b/src/QMCWaveFunctions/DiracDeterminant.h index 70333f9cc..16111185f 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.h +++ b/src/QMCWaveFunctions/DiracDeterminant.h @@ -74,6 +74,9 @@ class DiracDeterminant : public WaveFunctionComponent */ ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat); + //helper function, called by ratioGrad and multi_ratioGrad + ValueType ratioGrad_compute(int iat, GradType& grad_iat); + GradType evalGrad(ParticleSet& P, int iat); /** move was accepted, update the real container @@ -86,6 +89,12 @@ class DiracDeterminant : public WaveFunctionComponent void recompute(ParticleSet& P); + void multi_ratioGrad(const std::vector& WFC_list, + const std::vector& P_list, + int iat, + std::vector& ratios, + std::vector& grad_new); + /// psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ ValueMatrix_t psiM_temp; @@ -119,6 +128,7 @@ class DiracDeterminant : public WaveFunctionComponent ValueType curRatio; private: + /// Timers NewTimer* UpdateTimer; NewTimer* RatioTimer; diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index e6e1b2dc8..683250c65 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -99,26 +99,24 @@ class SPOSet : public QMCTraits } /// operates on multiple walkers - virtual void multi_evaluate_v(const std::vector& spo_list, const std::vector& P_list, int iat) + virtual void multi_evaluate(const std::vector& spo_list, const std::vector& P_list, int iat, + std::vector& psi_v_list) { #pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_v(*P_list[iw], iat); + spo_list[iw]->evaluate(*P_list[iw], iat, *psi_v_list[iw]); } - virtual void multi_evaluate_vgl(const std::vector& spo_list, const std::vector& P_list, int iat) + virtual void multi_evaluate(const std::vector& spo_list, const std::vector& P_list, int iat, + std::vector& psi_v_list, + std::vector& dpsi_v_list, + std::vector& d2psi_v_list) { #pragma omp parallel for for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_vgl(*P_list[iw], iat); + spo_list[iw]->evaluate(*P_list[iw], iat, *psi_v_list[iw], *dpsi_v_list[iw], *d2psi_v_list[iw]); } - virtual void multi_evaluate_vgh(const std::vector& spo_list, const std::vector& P_list, int iat) - { -#pragma omp parallel for - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw]->evaluate_vgh(*P_list[iw], iat); - } }; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 2cc7ac49b..57ce46cee 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -59,7 +59,7 @@ void build_WaveFunction(bool useRef, } // create a spo view - WF.spo = build_SPOSet_view(useRef, spo_main, 1, 0); + auto spo = build_SPOSet_view(useRef, spo_main, 1, 0); const int nelup = els.getTotalNum() / 2; @@ -79,8 +79,8 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; - WF.Det_up = new DetType(WF.spo, 0, delay_rank); - WF.Det_dn = new DetType(WF.spo, nelup, delay_rank); + WF.Det_up = new DetType(spo, 0, delay_rank); + WF.Det_dn = new DetType(spo, nelup, delay_rank); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); @@ -116,8 +116,8 @@ void build_WaveFunction(bool useRef, // determinant component WF.nelup = nelup; - WF.Det_up = new DetType(WF.spo, 0, delay_rank); - WF.Det_dn = new DetType(WF.spo, nelup, delay_rank); + WF.Det_up = new DetType(spo, 0, delay_rank); + WF.Det_dn = new DetType(spo, nelup, delay_rank); // J1 component J1OrbType* J1 = new J1OrbType(ions, els); @@ -148,7 +148,6 @@ WaveFunction::WaveFunction() Is_built(false), nelup(0), ei_TableID(1), - spo(nullptr), Det_up(nullptr), Det_dn(nullptr), LogValue(0.0) @@ -158,7 +157,6 @@ WaveFunction::~WaveFunction() { if (Is_built) { - delete spo; delete Det_up; delete Det_dn; for (size_t i = 0; i < Jastrows.size(); i++) @@ -371,9 +369,6 @@ void WaveFunction::flex_ratioGrad(const std::vector& WF_list, { if (P_list.size() > 1) { - auto spo_list(extract_spo_list(WF_list)); - spo->multi_evaluate_vgh(spo_list, P_list, iat); - timers[Timer_Det]->start(); std::vector ratios_det(P_list.size()); for (int iw = 0; iw < P_list.size(); iw++) @@ -475,14 +470,6 @@ void WaveFunction::flex_evaluateGL(const std::vector& WF_list, WF_list[0]->evaluateGL(*P_list[0]); } -const std::vector WaveFunction::extract_spo_list(const std::vector& WF_list) const -{ - std::vector spo_list; - for (auto it = WF_list.begin(); it != WF_list.end(); it++) - spo_list.push_back((*it)->spo); - return spo_list; -} - const std::vector WaveFunction::extract_up_list(const std::vector& WF_list) const { std::vector up_list; diff --git a/src/QMCWaveFunctions/WaveFunction.h b/src/QMCWaveFunctions/WaveFunction.h index c9f226add..cfd16148f 100644 --- a/src/QMCWaveFunctions/WaveFunction.h +++ b/src/QMCWaveFunctions/WaveFunction.h @@ -41,8 +41,7 @@ class WaveFunction using posT = TinyVector; private: - /// single particle orbitals - SPOSet* spo; + /// Slater determinants WaveFunctionComponent* Det_up; WaveFunctionComponent* Det_dn; /// Jastrow factors @@ -103,7 +102,6 @@ class WaveFunction const RandomGenerator& RNG, int delay_rank, bool enableJ3); - const std::vector extract_spo_list(const std::vector& WF_list) const; const std::vector extract_up_list(const std::vector& WF_list) const; const std::vector From 4639a3ea4ff361f03a04e4cb6fd26f3fa544f16c Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 16:22:41 -0500 Subject: [PATCH 17/29] Fixing timers --- src/Drivers/miniqmc.cpp | 2 ++ src/Drivers/miniqmc_sync_move.cpp | 5 +++++ src/QMCWaveFunctions/DiracDeterminant.cpp | 2 +- src/QMCWaveFunctions/WaveFunction.cpp | 15 +++------------ 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 939809b44..6e8da2474 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -459,7 +459,9 @@ int main(int argc, char** argv) } // iel } // substeps + Timers[Timer_Update]->start(); wavefunction.completeUpdates(); + Timers[Timer_Update]->stop(); els.donePbyP(); // evaluate Kinetic Energy diff --git a/src/Drivers/miniqmc_sync_move.cpp b/src/Drivers/miniqmc_sync_move.cpp index db027a73d..d13fd7d0b 100644 --- a/src/Drivers/miniqmc_sync_move.cpp +++ b/src/Drivers/miniqmc_sync_move.cpp @@ -504,6 +504,11 @@ int main(int argc, char** argv) } // iel } // substeps + Timers[Timer_Update]->start(); + for (int iw = 0; iw < nw_this_batch; iw++) + WF_list[iw]->completeUpdates(); + Timers[Timer_Update]->stop(); + for (int iw = 0; iw < nw_this_batch; iw++) Sub_list[iw]->els.donePbyP(); diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 5ec4975ec..8fcba0324 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -93,7 +93,6 @@ typename DiracDeterminant::ValueType DiracDeterminant::ratioGr int iat, GradType& grad_iat) { - UpdateMode = ORB_PBYP_PARTIAL; SPOVGLTimer->start(); Phi->evaluate(P, iat, psiV, dpsiV, d2psiV); SPOVGLTimer->stop(); @@ -104,6 +103,7 @@ template typename DiracDeterminant::ValueType DiracDeterminant::ratioGrad_compute(int iat, GradType& grad_iat) { + UpdateMode = ORB_PBYP_PARTIAL; RatioTimer->start(); const int WorkingIndex = iat - FirstIndex; ValueType ratio; diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index 57ce46cee..f1272a57c 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -32,12 +32,11 @@ namespace qmcplusplus { enum WaveFunctionTimers { - Timer_Det, Timer_GL, }; TimerNameLevelList_t WaveFunctionTimerNames = - {{Timer_Det, "Determinant", timer_level_fine}, {Timer_GL, "Kinetic Energy", timer_level_coarse}}; + {{Timer_GL, "Kinetic Energy", timer_level_coarse}}; void build_WaveFunction(bool useRef, @@ -263,11 +262,9 @@ void WaveFunction::evaluateGL(ParticleSet& P) constexpr valT czero(0); P.G = czero; P.L = czero; - timers[Timer_Det]->start(); Det_up->evaluateGL(P, P.G, P.L); Det_dn->evaluateGL(P, P.G, P.L); LogValue = Det_up->LogValue + Det_dn->LogValue; - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -296,7 +293,6 @@ void WaveFunction::flex_evaluateLog(const std::vector& WF_list, *L_list[iw] = czero; } // det up/dn - timers[Timer_Det]->start(); std::vector up_list(extract_up_list(WF_list)); Det_up->multi_evaluateLog(up_list, P_list, G_list, L_list, LogValues); for (int iw = 0; iw < P_list.size(); iw++) @@ -305,7 +301,6 @@ void WaveFunction::flex_evaluateLog(const std::vector& WF_list, Det_dn->multi_evaluateLog(dn_list, P_list, G_list, L_list, LogValues); for (int iw = 0; iw < P_list.size(); iw++) WF_list[iw]->LogValue += LogValues[iw]; - timers[Timer_Det]->stop(); // Jastrow factors for (size_t i = 0; i < Jastrows.size(); i++) { @@ -330,7 +325,6 @@ void WaveFunction::flex_evalGrad(const std::vector& WF_list, { if (P_list.size() > 1) { - timers[Timer_Det]->start(); std::vector grad_now_det(P_list.size()); if (iat < nelup) { @@ -344,7 +338,6 @@ void WaveFunction::flex_evalGrad(const std::vector& WF_list, } for (int iw = 0; iw < P_list.size(); iw++) grad_now[iw] = grad_now_det[iw]; - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -369,7 +362,6 @@ void WaveFunction::flex_ratioGrad(const std::vector& WF_list, { if (P_list.size() > 1) { - timers[Timer_Det]->start(); std::vector ratios_det(P_list.size()); for (int iw = 0; iw < P_list.size(); iw++) grad_new[iw] = valT(0); @@ -385,7 +377,6 @@ void WaveFunction::flex_ratioGrad(const std::vector& WF_list, } for (int iw = 0; iw < P_list.size(); iw++) ratios[iw] = ratios_det[iw]; - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -409,7 +400,6 @@ void WaveFunction::flex_acceptrestoreMove(const std::vector& WF_l { if (P_list.size() > 1) { - timers[Timer_Det]->start(); if (iat < nelup) { std::vector up_list(extract_up_list(WF_list)); @@ -420,7 +410,6 @@ void WaveFunction::flex_acceptrestoreMove(const std::vector& WF_l std::vector dn_list(extract_dn_list(WF_list)); Det_dn->multi_acceptrestoreMove(dn_list, P_list, isAccepted, iat); } - timers[Timer_Det]->stop(); for (size_t i = 0; i < Jastrows.size(); i++) { @@ -439,6 +428,8 @@ void WaveFunction::flex_evaluateGL(const std::vector& WF_list, { if (P_list.size() > 1) { + ScopedTimer local_timer(timers[Timer_GL]); + constexpr valT czero(0); const std::vector G_list(extract_G_list(P_list)); const std::vector L_list(extract_L_list(P_list)); From 87dbf1afe1bb9698156a9d9d8b0db4fea8888970 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 20:49:53 -0500 Subject: [PATCH 18/29] Clean up interface --- src/QMCWaveFunctions/SPOSet.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 683250c65..c15cc30d7 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -46,20 +46,12 @@ class SPOSet : public QMCTraits virtual ~SPOSet() {} /// operates on a single walker - /// evaluating SPOs - virtual void evaluate_v(const ParticleSet& P, int iat) = 0; - virtual void evaluate_vgl(const ParticleSet& P, int iat) = 0; - virtual void evaluate_vgh(const ParticleSet& P, int iat) = 0; - /** evaluate the values of this single-particle orbital set * @param P current ParticleSet * @param iat active particle * @param psi values of the SPO */ - virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) - { - evaluate_v(P, iat); - } + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) = 0; /** evaluate the values, gradients and laplacians of this single-particle orbital set * @param P current ParticleSet @@ -68,10 +60,7 @@ class SPOSet : public QMCTraits * @param dpsi gradients of the SPO * @param d2psi laplacians of the SPO */ - virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) - { - evaluate_vgh(P, iat); - } + virtual void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v, GradVector_t& dpsi_v, ValueVector_t& d2psi_v) = 0; /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles * @param P current ParticleSet From 05341260d4e9f0f61bb46db672b292c338a4725d Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 5 Jun 2019 22:20:03 -0500 Subject: [PATCH 19/29] Add a few more multi_ function in DiracDeterminant --- src/QMCWaveFunctions/DiracDeterminant.cpp | 23 +++++++++++++++++++ src/QMCWaveFunctions/DiracDeterminant.h | 27 ++++++++++++++++------- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/QMCWaveFunctions/DiracDeterminant.cpp b/src/QMCWaveFunctions/DiracDeterminant.cpp index 8fcba0324..8cdd368eb 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.cpp +++ b/src/QMCWaveFunctions/DiracDeterminant.cpp @@ -281,6 +281,17 @@ void DiracDeterminant::recompute(ParticleSet& P) } } +template +void DiracDeterminant::multi_evaluateLog(const std::vector& WFC_list, + const std::vector& P_list, + const std::vector& G_list, + const std::vector& L_list, + ParticleSet::ParticleValue_t& values) +{ + for (int iw = 0; iw < P_list.size(); iw++) + values[iw] = WFC_list[iw]->evaluateLog(*P_list[iw], *G_list[iw], *L_list[iw]); +}; + template void DiracDeterminant::multi_ratioGrad(const std::vector& WFC_list, const std::vector& P_list, @@ -311,6 +322,18 @@ void DiracDeterminant::multi_ratioGrad(const std::vector*>(WFC_list[iw])->ratioGrad_compute(iat, grad_new[iw]); } +template +void DiracDeterminant::multi_acceptrestoreMove(const std::vector& WFC_list, + const std::vector& P_list, + const std::vector& isAccepted, + int iat) +{ + for (int iw = 0; iw < P_list.size(); iw++) + if (isAccepted[iw]) + WFC_list[iw]->acceptMove(*P_list[iw], iat); +}; + + typedef QMCTraits::ValueType ValueType; typedef QMCTraits::QTFull::ValueType mValueType; diff --git a/src/QMCWaveFunctions/DiracDeterminant.h b/src/QMCWaveFunctions/DiracDeterminant.h index 16111185f..6ebb742fc 100644 --- a/src/QMCWaveFunctions/DiracDeterminant.h +++ b/src/QMCWaveFunctions/DiracDeterminant.h @@ -61,39 +61,50 @@ class DiracDeterminant : public WaveFunctionComponent void evaluateGL(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L, - bool fromscratch = false); + bool fromscratch = false) override; /** return the ratio only for the iat-th partcle move * @param P current configuration * @param iat the particle thas is being moved */ - ValueType ratio(ParticleSet& P, int iat); + ValueType ratio(ParticleSet& P, int iat) override; /** compute multiple ratios for a particle move void evaluateRatios(VirtualParticleSet& VP, std::vector& ratios); */ - ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat); + ValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) override; //helper function, called by ratioGrad and multi_ratioGrad ValueType ratioGrad_compute(int iat, GradType& grad_iat); - GradType evalGrad(ParticleSet& P, int iat); + GradType evalGrad(ParticleSet& P, int iat) override; /** move was accepted, update the real container */ - void acceptMove(ParticleSet& P, int iat); - void completeUpdates(); + void acceptMove(ParticleSet& P, int iat) override; + void completeUpdates() override; ///evaluate log of a determinant for a particle set - RealType evaluateLog(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L); + RealType evaluateLog(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) override; void recompute(ParticleSet& P); + void multi_evaluateLog(const std::vector& WFC_list, + const std::vector& P_list, + const std::vector& G_list, + const std::vector& L_list, + ParticleSet::ParticleValue_t& values) override; + void multi_ratioGrad(const std::vector& WFC_list, const std::vector& P_list, int iat, std::vector& ratios, - std::vector& grad_new); + std::vector& grad_new) override; + + void multi_acceptrestoreMove(const std::vector& WFC_list, + const std::vector& P_list, + const std::vector& isAccepted, + int iat) override; /// psiM(j,i) \f$= \psi_j({\bf r}_i)\f$ ValueMatrix_t psiM_temp; From ecc8035570e89d66eeb3092e8f557aac646b7138 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 6 Jun 2019 01:27:55 -0500 Subject: [PATCH 20/29] Use only xx of hess, mimic laplacian. --- src/QMCWaveFunctions/einspline_spo.hpp | 3 +-- src/QMCWaveFunctions/einspline_spo_ref.hpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index 553e9b20f..398d3ce4a 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -212,8 +212,7 @@ struct einspline_spo : public SPOSet { psi_v[j] = psi[i][j-first]; dpsi_v[j] = grad[i][j-first]; - d2psi_v[j] = hess[i].data(0)[j-first] + hess[i].data(1)[j-first] + hess[i].data(2)[j-first] + - hess[i].data(3)[j-first] + hess[i].data(4)[j-first] + hess[i].data(5)[j-first]; + d2psi_v[j] = hess[i].data(0)[j-first]; } } } diff --git a/src/QMCWaveFunctions/einspline_spo_ref.hpp b/src/QMCWaveFunctions/einspline_spo_ref.hpp index cee60b8b7..31930271e 100644 --- a/src/QMCWaveFunctions/einspline_spo_ref.hpp +++ b/src/QMCWaveFunctions/einspline_spo_ref.hpp @@ -213,8 +213,7 @@ struct einspline_spo_ref : public SPOSet { psi_v[j] = psi[i][j-first]; dpsi_v[j] = grad[i][j-first]; - d2psi_v[j] = hess[i].data(0)[j-first] + hess[i].data(1)[j-first] + hess[i].data(2)[j-first] + - hess[i].data(3)[j-first] + hess[i].data(4)[j-first] + hess[i].data(5)[j-first]; + d2psi_v[j] = hess[i].data(0)[j-first]; } } } From 4d869f6835a9d8693f1028efb3a8e49d5bc3166d Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 6 Jun 2019 09:51:05 -0500 Subject: [PATCH 21/29] Replace CI check. --- testing/miniqmc_openshift_rhea.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/miniqmc_openshift_rhea.sh b/testing/miniqmc_openshift_rhea.sh index 88c5a7e76..506c876a5 100755 --- a/testing/miniqmc_openshift_rhea.sh +++ b/testing/miniqmc_openshift_rhea.sh @@ -70,7 +70,7 @@ echo checking Determinant update full precision echo ---------------------------------------------------- echo -./bin/check_determinant +./bin/check_wfc -f Det echo "" echo "" @@ -120,7 +120,7 @@ echo checking Determinant update mixed precision echo ---------------------------------------------------- echo -./bin/check_determinant +./bin/check_wfc -f Det EOF From 862c4e5cbfaf4a9aeaa7ac78d54cd61f5d14cac3 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 7 Jun 2019 17:08:20 -0400 Subject: [PATCH 22/29] A bit update. --- src/Drivers/miniqmc.cpp | 4 +--- src/Drivers/miniqmc_sync_move.cpp | 6 +----- src/QMCWaveFunctions/WaveFunction.cpp | 20 +++++++++++++++++++- src/QMCWaveFunctions/WaveFunction.h | 2 ++ src/QMCWaveFunctions/WaveFunctionComponent.h | 7 +++++++ 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 6e8da2474..856637de8 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -457,11 +457,9 @@ int main(int argc, char** argv) wavefunction.restore(iel); } } // iel + wavefunction.completeUpdates(); } // substeps - Timers[Timer_Update]->start(); - wavefunction.completeUpdates(); - Timers[Timer_Update]->stop(); els.donePbyP(); // evaluate Kinetic Energy diff --git a/src/Drivers/miniqmc_sync_move.cpp b/src/Drivers/miniqmc_sync_move.cpp index d13fd7d0b..1d3851c61 100644 --- a/src/Drivers/miniqmc_sync_move.cpp +++ b/src/Drivers/miniqmc_sync_move.cpp @@ -502,13 +502,9 @@ int main(int argc, char** argv) Sub_list[iw]->els.rejectMove(iel); } } // iel + anon_mover.wavefunction.flex_completeUpdates(WF_list); } // substeps - Timers[Timer_Update]->start(); - for (int iw = 0; iw < nw_this_batch; iw++) - WF_list[iw]->completeUpdates(); - Timers[Timer_Update]->stop(); - for (int iw = 0; iw < nw_this_batch; iw++) Sub_list[iw]->els.donePbyP(); diff --git a/src/QMCWaveFunctions/WaveFunction.cpp b/src/QMCWaveFunctions/WaveFunction.cpp index f1272a57c..c23f6cc20 100644 --- a/src/QMCWaveFunctions/WaveFunction.cpp +++ b/src/QMCWaveFunctions/WaveFunction.cpp @@ -33,10 +33,12 @@ namespace qmcplusplus enum WaveFunctionTimers { Timer_GL, + Timer_CompleteUpdates, }; TimerNameLevelList_t WaveFunctionTimerNames = - {{Timer_GL, "Kinetic Energy", timer_level_coarse}}; + {{Timer_GL, "Kinetic Energy", timer_level_coarse}, + {Timer_CompleteUpdates, "Complete Updates", timer_level_coarse}}; void build_WaveFunction(bool useRef, @@ -249,6 +251,7 @@ void WaveFunction::acceptMove(ParticleSet& P, int iat) void WaveFunction::completeUpdates() { + ScopedTimer local_timer(timers[Timer_CompleteUpdates]); Det_up->completeUpdates(); Det_dn->completeUpdates(); } @@ -461,6 +464,21 @@ void WaveFunction::flex_evaluateGL(const std::vector& WF_list, WF_list[0]->evaluateGL(*P_list[0]); } +void WaveFunction::flex_completeUpdates(const std::vector& WF_list) const +{ + if (WF_list.size() > 1) + { + ScopedTimer local_timer(timers[Timer_CompleteUpdates]); + + std::vector up_list(extract_up_list(WF_list)); + Det_up->multi_completeUpdates(up_list); + std::vector dn_list(extract_dn_list(WF_list)); + Det_dn->multi_completeUpdates(dn_list); + } + else if(WF_list.size()==1) + WF_list[0]->completeUpdates(); +} + const std::vector WaveFunction::extract_up_list(const std::vector& WF_list) const { std::vector up_list; diff --git a/src/QMCWaveFunctions/WaveFunction.h b/src/QMCWaveFunctions/WaveFunction.h index cfd16148f..3b26d4b63 100644 --- a/src/QMCWaveFunctions/WaveFunction.h +++ b/src/QMCWaveFunctions/WaveFunction.h @@ -88,6 +88,8 @@ class WaveFunction void flex_evaluateGL(const std::vector& WF_list, const std::vector& P_list) const; + void flex_completeUpdates(const std::vector& WF_list) const; + // others int get_ei_TableID() const { return ei_TableID; } valT getLogValue() const { return LogValue; } diff --git a/src/QMCWaveFunctions/WaveFunctionComponent.h b/src/QMCWaveFunctions/WaveFunctionComponent.h index 5f442836c..e996a89c7 100644 --- a/src/QMCWaveFunctions/WaveFunctionComponent.h +++ b/src/QMCWaveFunctions/WaveFunctionComponent.h @@ -236,6 +236,13 @@ struct WaveFunctionComponent : public QMCTraits for (int iw = 0; iw < P_list.size(); iw++) WFC_list[iw]->evaluateGL(*P_list[iw], *G_list[iw], *L_list[iw], fromscratch); }; + + virtual void multi_completeUpdates(const std::vector& WFC_list) + { + #pragma omp parallel for + for (int iw = 0; iw < WFC_list.size(); iw++) + WFC_list[iw]->completeUpdates(); + } }; } // namespace qmcplusplus #endif From 44d2525e29e7629ae634643f8e5e16a176752721 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 7 Jun 2019 17:08:27 -0400 Subject: [PATCH 23/29] Change check_wfc G and L checks to relative errors. Absolute error checking is quite unreliable on deteriminants. MP build is not checked with deteriminant. --- src/Drivers/check_wfc.cpp | 32 +++++++++++++++---------------- testing/miniqmc_openshift_rhea.sh | 5 +++-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/Drivers/check_wfc.cpp b/src/Drivers/check_wfc.cpp index da2637d6e..266d43b2f 100644 --- a/src/Drivers/check_wfc.cpp +++ b/src/Drivers/check_wfc.cpp @@ -267,26 +267,26 @@ int main(int argc, char** argv) cout << "Check values ref " << wfc_ref->LogValue << " " << els_ref.G[12] << " " << els_ref.L[12] << endl << endl; - cout << "evaluateLog::V Error = " << (wfc->LogValue - wfc_ref->LogValue) / nels << endl; - evaluateLog_v_err += std::fabs((wfc->LogValue - wfc_ref->LogValue) / nels); + cout << "evaluateLog::V relative Error = " << (wfc->LogValue - wfc_ref->LogValue) / (wfc_ref->LogValue * nels) << endl; + evaluateLog_v_err += std::fabs((wfc->LogValue - wfc_ref->LogValue) / (wfc_ref->LogValue * nels)); { double g_err = 0.0; for (int iel = 0; iel < nels; ++iel) { PosType dr = (els.G[iel] - els_ref.G[iel]); - RealType d = sqrt(dot(dr, dr)); + RealType d = sqrt(dot(dr, dr)) / dot(els_ref.G[iel],els_ref.G[iel]); g_err += d; } - cout << "evaluateLog::G Error = " << g_err / nels << endl; + cout << "evaluateLog::G relative Error = " << g_err / nels << endl; evaluateLog_g_err += std::fabs(g_err / nels); } { double l_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - l_err += abs(els.L[iel] - els_ref.L[iel]); + l_err += abs((els.L[iel] - els_ref.L[iel])/els_ref.L[iel]); } - cout << "evaluateLog::L Error = " << l_err / nels << endl; + cout << "evaluateLog::L relative Error = " << l_err / nels << endl; evaluateLog_l_err += std::fabs(l_err / nels); } @@ -304,7 +304,7 @@ int main(int argc, char** argv) els_ref.setActive(iel); PosType grad_ref = wfc_ref->evalGrad(els_ref, iel) - grad_soa; - g_eval += sqrt(dot(grad_ref, grad_ref)); + g_eval += sqrt(dot(grad_ref, grad_ref)/dot(grad_soa,grad_soa)); els.makeMove(iel, delta[iel]); els_ref.makeMove(iel, delta[iel]); @@ -315,7 +315,7 @@ int main(int argc, char** argv) RealType r_ref = wfc_ref->ratioGrad(els_ref, iel, grad_ref); grad_ref -= grad_soa; - g_ratio += sqrt(dot(grad_ref, grad_ref)); + g_ratio += sqrt(dot(grad_ref, grad_ref)/dot(grad_soa, grad_soa)); r_ratio += abs(r_soa / r_ref - 1); if (ur[iel] < r_ref) @@ -338,8 +338,8 @@ int main(int argc, char** argv) wfc_ref->completeUpdates(); cout << "Accepted " << naccepted << "/" << nels << endl; - cout << "evalGrad::G Error = " << g_eval / nels << endl; - cout << "ratioGrad::G Error = " << g_ratio / nels << endl; + cout << "evalGrad::G relative Error = " << g_eval / nels << endl; + cout << "ratioGrad::G relative Error = " << g_ratio / nels << endl; cout << "ratioGrad::Ratio Error = " << r_ratio / nels << endl; evalGrad_g_err += std::fabs(g_eval / nels); ratioGrad_g_err += std::fabs(g_ratio / nels); @@ -362,19 +362,19 @@ int main(int argc, char** argv) for (int iel = 0; iel < nels; ++iel) { PosType dr = (els.G[iel] - els_ref.G[iel]); - RealType d = sqrt(dot(dr, dr)); + RealType d = sqrt(dot(dr, dr)/dot(els_ref.G[iel],els_ref.G[iel])); g_err += d; } - cout << "evaluteGL::G Error = " << g_err / nels << endl; + cout << "evaluteGL::G relative Error = " << g_err / nels << endl; evaluateGL_g_err += std::fabs(g_err / nels); } { double l_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - l_err += abs(els.L[iel] - els_ref.L[iel]); + l_err += abs((els.L[iel] - els_ref.L[iel])/els_ref.L[iel]); } - cout << "evaluteGL::L Error = " << l_err / nels << endl; + cout << "evaluteGL::L relative Error = " << l_err / nels << endl; evaluateGL_l_err += std::fabs(l_err / nels); } @@ -410,8 +410,8 @@ int main(int argc, char** argv) } // end of omp parallel int np = omp_get_max_threads(); - const RealType small = std::numeric_limits::epsilon() * ( wfc_name == "Det" ? 1e8 : 1e4 ); - std::cout << "Tolerance " << small << std::endl; + const RealType small = std::numeric_limits::epsilon() * ( wfc_name == "Det" ? 1e6 : 1e4 ); + std::cout << "Passing Tolerance " << small << std::endl; bool fail = false; cout << std::endl; if (evaluateLog_v_err / np > small) diff --git a/testing/miniqmc_openshift_rhea.sh b/testing/miniqmc_openshift_rhea.sh index d6ceebc8b..474252bd5 100755 --- a/testing/miniqmc_openshift_rhea.sh +++ b/testing/miniqmc_openshift_rhea.sh @@ -120,7 +120,8 @@ echo checking Determinant update mixed precision echo ---------------------------------------------------- echo -./bin/check_wfc -f Det +# YL: commented out mixed precision check. Too unstable +#./bin/check_wfc -f Det EOF @@ -129,4 +130,4 @@ EOF cp $BUILD_DIR/$BUILD_TAG.o* ../ # get status from all checks -[ $(grep -e 'All checks passed for J[123]' -e 'All checks passed for spo' -e 'All checks passed for determinant' ../$BUILD_TAG.o* | wc -l) -eq 10 ] +[ $(grep -e 'All checks passed for J[123]' -e 'All checks passed for spo' -e 'All checks passed for determinant' ../$BUILD_TAG.o* | wc -l) -eq 9 ] From f21e626f9ff5115f5353ebd8326cff74ec821cbd Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 7 Jun 2019 23:18:07 -0500 Subject: [PATCH 24/29] Use relative error check on Det only --- src/Drivers/check_wfc.cpp | 55 +++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/src/Drivers/check_wfc.cpp b/src/Drivers/check_wfc.cpp index 266d43b2f..a147e61e7 100644 --- a/src/Drivers/check_wfc.cpp +++ b/src/Drivers/check_wfc.cpp @@ -65,6 +65,24 @@ void print_help() exit(1); // print help and exit } +template +T check_grads(TinyVector& grad, TinyVector& grad_ref, bool use_relative_error) +{ + if(use_relative_error) + return sqrt(dot(grad - grad_ref, grad - grad_ref) / dot(grad, grad_ref)); + else + return sqrt(dot(grad - grad_ref, grad - grad_ref)); +} + +template +T check_val(T val, T val_ref, bool use_relative_error) +{ + if(use_relative_error) + return abs((val-val_ref)/val_ref); + else + return abs(val-val_ref); +} + int main(int argc, char** argv) { // clang-format off @@ -199,6 +217,8 @@ int main(int argc, char** argv) WaveFunctionComponentPtr wfc = nullptr; WaveFunctionComponentPtr wfc_ref = nullptr; + bool use_relative_error(false); + if (wfc_name == "J2") { TwoBodyJastrow>* J = new TwoBodyJastrow>(els); @@ -246,6 +266,7 @@ int main(int argc, char** argv) auto* Det_ref = new miniqmcreference::DiracDeterminantRef<>(spo_ref, 0, 31); wfc_ref = dynamic_cast(Det_ref); cout << "Built Det_ref" << endl; + use_relative_error = true; } constexpr RealType czero(0); @@ -267,26 +288,25 @@ int main(int argc, char** argv) cout << "Check values ref " << wfc_ref->LogValue << " " << els_ref.G[12] << " " << els_ref.L[12] << endl << endl; - cout << "evaluateLog::V relative Error = " << (wfc->LogValue - wfc_ref->LogValue) / (wfc_ref->LogValue * nels) << endl; - evaluateLog_v_err += std::fabs((wfc->LogValue - wfc_ref->LogValue) / (wfc_ref->LogValue * nels)); + cout << "evaluateLog::V Error = " << (wfc->LogValue - wfc_ref->LogValue) / nels << endl; + evaluateLog_v_err += std::fabs((wfc->LogValue - wfc_ref->LogValue) / nels); { double g_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - PosType dr = (els.G[iel] - els_ref.G[iel]); - RealType d = sqrt(dot(dr, dr)) / dot(els_ref.G[iel],els_ref.G[iel]); + RealType d = check_grads(els.G[iel], els_ref.G[iel], use_relative_error); g_err += d; } - cout << "evaluateLog::G relative Error = " << g_err / nels << endl; + cout << "evaluateLog::G Error = " << g_err / nels << endl; evaluateLog_g_err += std::fabs(g_err / nels); } { double l_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - l_err += abs((els.L[iel] - els_ref.L[iel])/els_ref.L[iel]); + l_err += check_val(els.L[iel], els_ref.L[iel], use_relative_error); } - cout << "evaluateLog::L relative Error = " << l_err / nels << endl; + cout << "evaluateLog::L Error = " << l_err / nels << endl; evaluateLog_l_err += std::fabs(l_err / nels); } @@ -303,8 +323,8 @@ int main(int argc, char** argv) PosType grad_soa = wfc->evalGrad(els, iel); els_ref.setActive(iel); - PosType grad_ref = wfc_ref->evalGrad(els_ref, iel) - grad_soa; - g_eval += sqrt(dot(grad_ref, grad_ref)/dot(grad_soa,grad_soa)); + PosType grad_ref = wfc_ref->evalGrad(els_ref, iel); + g_eval += check_grads(grad_soa, grad_ref, use_relative_error); els.makeMove(iel, delta[iel]); els_ref.makeMove(iel, delta[iel]); @@ -314,8 +334,7 @@ int main(int argc, char** argv) grad_ref = 0; RealType r_ref = wfc_ref->ratioGrad(els_ref, iel, grad_ref); - grad_ref -= grad_soa; - g_ratio += sqrt(dot(grad_ref, grad_ref)/dot(grad_soa, grad_soa)); + g_ratio += check_grads(grad_soa, grad_ref, use_relative_error); r_ratio += abs(r_soa / r_ref - 1); if (ur[iel] < r_ref) @@ -338,8 +357,8 @@ int main(int argc, char** argv) wfc_ref->completeUpdates(); cout << "Accepted " << naccepted << "/" << nels << endl; - cout << "evalGrad::G relative Error = " << g_eval / nels << endl; - cout << "ratioGrad::G relative Error = " << g_ratio / nels << endl; + cout << "evalGrad::G Error = " << g_eval / nels << endl; + cout << "ratioGrad::G Error = " << g_ratio / nels << endl; cout << "ratioGrad::Ratio Error = " << r_ratio / nels << endl; evalGrad_g_err += std::fabs(g_eval / nels); ratioGrad_g_err += std::fabs(g_ratio / nels); @@ -361,20 +380,18 @@ int main(int argc, char** argv) double g_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - PosType dr = (els.G[iel] - els_ref.G[iel]); - RealType d = sqrt(dot(dr, dr)/dot(els_ref.G[iel],els_ref.G[iel])); - g_err += d; + g_err += check_grads(els.G[iel], els_ref.G[iel], use_relative_error); } - cout << "evaluteGL::G relative Error = " << g_err / nels << endl; + cout << "evaluteGL::G Error = " << g_err / nels << endl; evaluateGL_g_err += std::fabs(g_err / nels); } { double l_err = 0.0; for (int iel = 0; iel < nels; ++iel) { - l_err += abs((els.L[iel] - els_ref.L[iel])/els_ref.L[iel]); + l_err += check_val(els.L[iel], els_ref.L[iel], use_relative_error); } - cout << "evaluteGL::L relative Error = " << l_err / nels << endl; + cout << "evaluteGL::L Error = " << l_err / nels << endl; evaluateGL_l_err += std::fabs(l_err / nels); } From ce4e53cf17bbe8849b132d297b70bb556197d098 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 10 Jun 2019 11:42:40 -0500 Subject: [PATCH 25/29] Correct script. --- testing/miniqmc_openshift_rhea.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/miniqmc_openshift_rhea.sh b/testing/miniqmc_openshift_rhea.sh index 474252bd5..f54e0e09b 100755 --- a/testing/miniqmc_openshift_rhea.sh +++ b/testing/miniqmc_openshift_rhea.sh @@ -130,4 +130,4 @@ EOF cp $BUILD_DIR/$BUILD_TAG.o* ../ # get status from all checks -[ $(grep -e 'All checks passed for J[123]' -e 'All checks passed for spo' -e 'All checks passed for determinant' ../$BUILD_TAG.o* | wc -l) -eq 9 ] +[ $(grep -e 'All checks passed for J[123]' -e 'All checks passed for spo' -e 'All checks passed for Det' ../$BUILD_TAG.o* | wc -l) -eq 9 ] From b90bab65b9cb100776916f16a3870c2f1f76d96b Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 10 Jun 2019 12:26:33 -0500 Subject: [PATCH 26/29] Change DU rank option to -k. --- src/Drivers/miniqmc.cpp | 8 ++++---- src/Drivers/miniqmc_sync_move.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 856637de8..e6498ad42 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -153,7 +153,7 @@ void print_help() app_summary() << " miniqmc [-bhjvV] [-g \"n0 n1 n2\"] [-m meshfactor]" << '\n'; app_summary() << " [-n steps] [-N substeps] [-x rmax]" << '\n'; app_summary() << " [-r AcceptanceRatio] [-s seed] [-w walkers]" << '\n'; - app_summary() << " [-a tile_size] [-t timer_level] [-u delay_rank]" << '\n'; + app_summary() << " [-a tile_size] [-t timer_level] [-k delay_rank]" << '\n'; app_summary() << "options:" << '\n'; app_summary() << " -a size of each spline tile default: num of orbs" << '\n'; app_summary() << " -b use reference implementations default: off" << '\n'; @@ -166,7 +166,7 @@ void print_help() app_summary() << " -r set the acceptance ratio. default: 0.5" << '\n'; app_summary() << " -s set the random seed. default: 11" << '\n'; app_summary() << " -t timer level: coarse or fine default: fine" << '\n'; - app_summary() << " -u matrix delayed update rank default: 32" << '\n'; + app_summary() << " -k matrix delayed update rank default: 32" << '\n'; app_summary() << " -v verbose output" << '\n'; app_summary() << " -V print version information and exit" << '\n'; app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; @@ -217,7 +217,7 @@ int main(int argc, char** argv) int opt; while (optind < argc) { - if ((opt = getopt(argc, argv, "bhjvVa:c:g:m:n:N:r:s:t:u:w:x:")) != -1) + if ((opt = getopt(argc, argv, "bhjvVa:c:g:m:n:N:r:s:t:k:w:x:")) != -1) { switch (opt) { @@ -263,7 +263,7 @@ int main(int argc, char** argv) case 't': timer_level_name = std::string(optarg); break; - case 'u': + case 'k': delay_rank = atoi(optarg); break; case 'v': diff --git a/src/Drivers/miniqmc_sync_move.cpp b/src/Drivers/miniqmc_sync_move.cpp index 1d3851c61..2b04b0925 100644 --- a/src/Drivers/miniqmc_sync_move.cpp +++ b/src/Drivers/miniqmc_sync_move.cpp @@ -154,7 +154,7 @@ void print_help() app_summary() << " [-n steps] [-N substeps] [-x rmax]" << '\n'; app_summary() << " [-r AcceptanceRatio] [-s seed] [-w walkers]" << '\n'; app_summary() << " [-a tile_size] [-t timer_level] [-B nw_b]" << '\n'; - app_summary() << " [-u delay_rank]" << '\n'; + app_summary() << " [-k delay_rank]" << '\n'; app_summary() << "options:" << '\n'; app_summary() << " -a size of each spline tile default: num of orbs" << '\n'; app_summary() << " -b use reference implementations default: off" << '\n'; @@ -169,7 +169,7 @@ void print_help() app_summary() << " -r set the acceptance ratio. default: 0.5" << '\n'; app_summary() << " -s set the random seed. default: 11" << '\n'; app_summary() << " -t timer level: coarse or fine default: fine" << '\n'; - app_summary() << " -u matrix delayed update rank default: 32" << '\n'; + app_summary() << " -k matrix delayed update rank default: 32" << '\n'; app_summary() << " -v verbose output" << '\n'; app_summary() << " -V print version information and exit" << '\n'; app_summary() << " -w number of walker(movers) default: num of threads"<< '\n'; @@ -225,7 +225,7 @@ int main(int argc, char** argv) int opt; while (optind < argc) { - if ((opt = getopt(argc, argv, "bhjPvVa:B:c:g:m:n:N:r:s:t:u:w:x:")) != -1) + if ((opt = getopt(argc, argv, "bhjPvVa:B:c:g:m:n:N:r:s:t:k:w:x:")) != -1) { switch (opt) { @@ -277,7 +277,7 @@ int main(int argc, char** argv) case 't': timer_level_name = std::string(optarg); break; - case 'u': + case 'k': delay_rank = atoi(optarg); break; case 'v': From 43a3da8b34323e8ddd16d3a3db7cb0063c56bdd5 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Mon, 10 Jun 2019 13:35:39 -0500 Subject: [PATCH 27/29] Adjust omp runtime calls. --- src/QMCWaveFunctions/DelayedUpdate.h | 2 +- src/QMCWaveFunctions/DiracMatrix.h | 2 +- src/Utilities/Configuration.h | 11 ++++------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/QMCWaveFunctions/DelayedUpdate.h b/src/QMCWaveFunctions/DelayedUpdate.h index 736afffe1..dc333f6ea 100644 --- a/src/QMCWaveFunctions/DelayedUpdate.h +++ b/src/QMCWaveFunctions/DelayedUpdate.h @@ -174,7 +174,7 @@ class DelayedUpdate else { const int lda_Binv = Binv.cols(); - int num_threads_nested = getNumThreadsNested(); + int num_threads_nested = getNextLevelNumThreads(); // always use serial when norb is small or only one second level thread bool use_serial(norb <= 256 || num_threads_nested == 1); if (use_serial || BlasThreadingEnv::NestedThreadingSupported()) diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h index 04f16c21e..222f2c72f 100644 --- a/src/QMCWaveFunctions/DiracMatrix.h +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -162,7 +162,7 @@ class DiracMatrix */ inline void invert_transpose(const Matrix& amat, Matrix& invMat, real_type& LogDet, real_type& Phase) { - BlasThreadingEnv knob(getNumThreadsNested()); + BlasThreadingEnv knob(getNextLevelNumThreads()); const int n = invMat.rows(); const int lda = invMat.cols(); T_FP* invMat_ptr(nullptr); diff --git a/src/Utilities/Configuration.h b/src/Utilities/Configuration.h index b849c0eb9..dba9b3e06 100644 --- a/src/Utilities/Configuration.h +++ b/src/Utilities/Configuration.h @@ -50,17 +50,14 @@ inline omp_int_t omp_get_ancestor_thread_num(int level) { return 0; } inline bool omp_get_nested() { return false; } #endif -/// get the number of threads at the next nested level -inline int getNumThreadsNested() +/// get the number of threads at the next parallel level +inline int getNextLevelNumThreads() { int num_threads = 1; - if (omp_get_nested()) - { #pragma omp parallel - { + { #pragma omp master - num_threads = omp_get_num_threads(); - } + num_threads = omp_get_num_threads(); } return num_threads; } From 7a1ae9dec5bedb41b3b61560e30ec652ae092dfb Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 12 Jun 2019 18:43:19 -0500 Subject: [PATCH 28/29] Move LAPACK wrapper to OhmmsBlas.h --- src/Numerics/OhmmsBlas.h | 56 +++++++++------------ src/QMCWaveFunctions/DiracMatrix.h | 79 ++---------------------------- 2 files changed, 28 insertions(+), 107 deletions(-) diff --git a/src/Numerics/OhmmsBlas.h b/src/Numerics/OhmmsBlas.h index 2d621a6d0..1dfb11c6d 100644 --- a/src/Numerics/OhmmsBlas.h +++ b/src/Numerics/OhmmsBlas.h @@ -372,58 +372,48 @@ struct BLAS } }; -struct LAPACK +namespace LAPACK { + inline void getrf(int n, int m, float* a, int lda, int* piv, int& status) + { + sgetrf(n, m, a, lda, piv, status); + } + + inline void getrf(int n, int m, std::complex* a, int lda, int* piv, int& status) + { + cgetrf(n, m, a, lda, piv, status); + } - inline static void gesvd(char *jobu, char *jobvt, int *m, int *n, float *a, - int *lda, float *s, float *u, int *ldu, float *vt, - int *ldvt, float *work, int *lwork, int *info) + inline void getrf(int n, int m, double* a, int lda, int* piv, int& status) { - sgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info); + dgetrf(n, m, a, lda, piv, status); } - inline static void gesvd(char *jobu, char *jobvt, int *m, int *n, double *a, - int *lda, double *s, double *u, int *ldu, double *vt, - int *ldvt, double *work, int *lwork, int *info) + inline void getrf(int n, int m, std::complex* a, int lda, int* piv, int& status) { - dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info); + zgetrf(n, m, a, lda, piv, status); } - inline static void geev(char *jobvl, char *jobvr, int *n, double *a, int *lda, - double *alphar, double *alphai, double *vl, int *ldvl, - double *vr, int *ldvr, double *work, int *lwork, - int *info) + inline void getri(int n, float* a, int lda, int* piv, float* work, int& lwork, int& status) { - dgeev(jobvl, jobvr, n, a, lda, alphar, alphai, vl, ldvl, vr, ldvr, work, - lwork, info); + sgetri(n, a, lda, piv, work, lwork, status); } - inline static void geev(char *jobvl, char *jobvr, int *n, float *a, int *lda, - float *alphar, float *alphai, float *vl, int *ldvl, - float *vr, int *ldvr, float *work, int *lwork, - int *info) + inline void getri(int n, std::complex* a, int lda, int* piv, std::complex* work, int& lwork, int& status) { - sgeev(jobvl, jobvr, n, a, lda, alphar, alphai, vl, ldvl, vr, ldvr, work, - lwork, info); + cgetri(n, a, lda, piv, work, lwork, status); } - inline static void ggev(char *jobvl, char *jobvr, int *n, double *a, int *lda, - double *b, int *ldb, double *alphar, double *alphai, - double *beta, double *vl, int *ldvl, double *vr, - int *ldvr, double *work, int *lwork, int *info) + inline void getri(int n, double* a, int lda, int* piv, double* work, int& lwork, int& status) { - dggev(jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai, beta, vl, ldvl, vr, - ldvr, work, lwork, info); + dgetri(n, a, lda, piv, work, lwork, status); } - inline static void ggev(char *jobvl, char *jobvr, int *n, float *a, int *lda, - float *b, int *ldb, float *alphar, float *alphai, - float *beta, float *vl, int *ldvl, float *vr, - int *ldvr, float *work, int *lwork, int *info) + inline void getri(int n, std::complex* a, int lda, int* piv, std::complex* work, int& lwork, int& status) { - sggev(jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai, beta, vl, ldvl, vr, - ldvr, work, lwork, info); + zgetri(n, a, lda, piv, work, lwork, status); } + }; // clang-format on #endif // OHMMS_BLAS_H diff --git a/src/QMCWaveFunctions/DiracMatrix.h b/src/QMCWaveFunctions/DiracMatrix.h index 222f2c72f..3cb460a9c 100644 --- a/src/QMCWaveFunctions/DiracMatrix.h +++ b/src/QMCWaveFunctions/DiracMatrix.h @@ -22,77 +22,6 @@ namespace qmcplusplus { -inline void Xgetrf(int n, int m, float* restrict a, int lda, int* restrict piv) -{ - int status; - sgetrf(n, m, a, lda, piv, status); -} - -inline void Xgetri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork) -{ - int status; - sgetri(n, a, lda, piv, work, lwork, status); -} - -inline void Xgetrf(int n, int m, std::complex* restrict a, int lda, int* restrict piv) -{ - int status; - cgetrf(n, m, a, lda, piv, status); -} - -/** inversion of a float matrix after lu factorization*/ -inline void Xgetri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - cgetri(n, a, lda, piv, work, lwork, status); -} - -inline void Xgetrf(int n, int m, double* restrict a, int lda, int* restrict piv) -{ - int status; - dgetrf(n, m, a, lda, piv, status); -} - -inline void Xgetri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork) -{ - int status; - dgetri(n, a, lda, piv, work, lwork, status); -} - -inline void Xgetrf(int n, int m, std::complex* restrict a, int lda, int* restrict piv) -{ - int status; - zgetrf(n, m, a, lda, piv, status); -} - -/** inversion of a std::complex matrix after lu factorization*/ -inline void Xgetri(int n, - std::complex* restrict a, - int lda, - int* restrict piv, - std::complex* restrict work, - int& lwork) -{ - int status; - zgetri(n, a, lda, piv, work, lwork, status); -} - - -template -inline void TansposeSquare(const TIN* restrict in, TOUT* restrict out, size_t n, size_t lda) -{ -#pragma omp simd - for (size_t i = 0; i < n; ++i) - for (size_t j = 0; j < n; ++j) - out[i * lda + j] = in[i + j * lda]; -} - - template inline T computeLogDet(const T* restrict diag, int n, const int* restrict pivot, T& phase) { @@ -147,7 +76,8 @@ class DiracMatrix Lwork = -1; T_FP tmp; real_type_fp lw; - Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork); + int status; + LAPACK::getri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork, status); convert(tmp, lw); Lwork = static_cast(lw); m_work.resize(Lwork); @@ -176,13 +106,14 @@ class DiracMatrix #endif if (Lwork < lda) reset(invMat_ptr, lda); - Xgetrf(n, n, invMat_ptr, lda, m_pivot.data()); + int status; + LAPACK::getrf(n, n, invMat_ptr, lda, m_pivot.data(), status); for (int i = 0; i < n; i++) LU_diag[i] = invMat_ptr[i * lda + i]; real_type_fp Phase_tmp; LogDet = computeLogDet(LU_diag.data(), n, m_pivot.data(), Phase_tmp); Phase = Phase_tmp; - Xgetri(n, invMat_ptr, lda, m_pivot.data(), m_work.data(), Lwork); + LAPACK::getri(n, invMat_ptr, lda, m_pivot.data(), m_work.data(), Lwork, status); #if defined(MIXED_PRECISION) invMat = psiM_fp; #endif From 9afab3471aa0bed0aa5fa3278ee782fcda8b6ab8 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 12 Jun 2019 18:46:49 -0500 Subject: [PATCH 29/29] Minor clean up --- src/Utilities/scalar_traits.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Utilities/scalar_traits.h b/src/Utilities/scalar_traits.h index 4954f5fa3..b9d4a929b 100644 --- a/src/Utilities/scalar_traits.h +++ b/src/Utilities/scalar_traits.h @@ -31,7 +31,6 @@ struct scalar_traits }; typedef T real_type; typedef T value_type; - static inline T* get_address(T* a) { return a; } }; template @@ -43,7 +42,6 @@ struct scalar_traits> }; typedef T real_type; typedef std::complex value_type; - static inline T* get_address(std::complex* a) { return reinterpret_cast(a); } }; } // namespace qmcplusplus