From ca0baf48d605e88194a5fef2a781612072d32f32 Mon Sep 17 00:00:00 2001
From: Philip Fackler <facklerpw@ornl.gov>
Date: Fri, 1 Sep 2023 11:19:01 -0400
Subject: [PATCH] Further template propagation to fix offload build

---
 src/Numerics/OneDimGridFactory.cpp            |    9 +-
 src/Numerics/OneDimGridFactory.h              |    6 +-
 src/Numerics/SoaCartesianTensor.h             |    2 +-
 src/Numerics/SoaSphericalTensor.h             |    2 +
 src/Particle/CMakeLists.txt                   |    9 +
 src/Particle/DistanceTableT.h                 |  529 ++++
 src/Particle/DynamicCoordinatesT.cpp          |   43 +
 src/Particle/DynamicCoordinatesT.h            |  154 +
 src/Particle/Lattice/LRBreakupParameters.h    |    2 +-
 src/Particle/LongRange/KContainerT.cpp        |  272 ++
 src/Particle/LongRange/KContainerT.h          |  115 +
 src/Particle/LongRange/StructFactT.cpp        |  249 ++
 src/Particle/LongRange/StructFactT.h          |  159 +
 src/Particle/MCCoordsT.cpp                    |   69 +
 src/Particle/MCCoordsT.hpp                    |   82 +
 src/Particle/ParticleSetT.cpp                 | 1200 +++++++
 src/Particle/ParticleSetT.h                   |  980 ++++++
 src/Particle/ParticleSetTraits.h              |   85 +
 src/Particle/RealSpacePositionsT.h            |   96 +
 src/Particle/RealSpacePositionsTOMPTarget.h   |  328 ++
 src/Particle/SimulationCellT.cpp              |   74 +
 src/Particle/SimulationCellT.h                |   71 +
 src/Particle/SoaDistanceTableAAT.h            |  237 ++
 src/Particle/SoaDistanceTableAATOMPTarget.h   |  624 ++++
 src/Particle/SoaDistanceTableABT.h            |  170 +
 src/Particle/SoaDistanceTableABTOMPTarget.h   |  513 +++
 src/Particle/VirtualParticleSetT.cpp          |  272 ++
 src/Particle/VirtualParticleSetT.h            |  175 ++
 src/Particle/createDistanceTableT.cpp         |  240 ++
 src/Particle/createDistanceTableT.h           |   89 +
 .../createDistanceTableTOMPTarget.cpp         |  248 ++
 src/QMCWaveFunctions/BasisSetBaseT.h          |  222 ++
 .../BsplineFactory/BsplineSetT.h              |  412 +--
 .../BsplineFactory/SplineC2CT.cpp             |   10 +-
 .../BsplineFactory/SplineC2CT.h               |   10 +-
 .../BsplineFactory/SplineR2RT.cpp             |  901 +++---
 .../BsplineFactory/SplineR2RT.h               |  425 +--
 src/QMCWaveFunctions/CMakeLists.txt           |   24 +-
 src/QMCWaveFunctions/CompositeSPOSetT.cpp     |   10 +-
 src/QMCWaveFunctions/CompositeSPOSetT.h       |   10 +-
 .../ElectronGas/FreeOrbitalBuilderT.cpp       |    6 +-
 .../ElectronGas/FreeOrbitalBuilderT.h         |    4 +-
 .../ElectronGas/FreeOrbitalT.cpp              | 1182 ++++---
 .../ElectronGas/FreeOrbitalT.h                |  131 +-
 .../HarmonicOscillator/SHOSetBuilderT.cpp     |  333 +-
 .../HarmonicOscillator/SHOSetBuilderT.h       |   67 +-
 .../HarmonicOscillator/SHOSetT.cpp            |  874 +++---
 .../HarmonicOscillator/SHOSetT.h              |  283 +-
 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp |  923 ++++++
 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h   |   75 +
 .../LCAO/CuspCorrectionConstructionT.cpp      |   22 +-
 .../LCAO/CuspCorrectionConstructionT.h        |   17 +-
 .../LCAO/LCAOSpinorBuilderT.cpp               |  343 +-
 .../LCAO/LCAOSpinorBuilderT.h                 |   82 +-
 .../LCAO/LCAOrbitalBuilderT.cpp               |  303 +-
 .../LCAO/LCAOrbitalBuilderT.h                 |   14 +-
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp  | 1764 ++++++-----
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h    |   44 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.cpp     |   82 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.h       |   74 +-
 .../LCAO/MultiFunctorAdapter.h                |   56 +
 .../LCAO/RadialOrbitalSetBuilder.h            |    2 +-
 .../LCAO/SoaAtomicBasisSetT.h                 |  775 +++++
 .../LCAO/SoaCuspCorrectionT.cpp               |  259 +-
 .../LCAO/SoaCuspCorrectionT.h                 |  186 +-
 .../LCAO/SoaLocalizedBasisSetT.cpp            |  469 +++
 .../LCAO/SoaLocalizedBasisSetT.h              |  190 ++
 src/QMCWaveFunctions/OptimizableObjectT.h     |  151 +
 src/QMCWaveFunctions/PlaneWave/PWBasisT.h     |  534 ++--
 .../PlaneWave/PWOrbitalSetT.cpp               |  209 +-
 .../PlaneWave/PWOrbitalSetT.h                 |  224 +-
 src/QMCWaveFunctions/RotatedSPOsT.cpp         | 2767 +++++++++--------
 src/QMCWaveFunctions/RotatedSPOsT.h           |  781 ++---
 .../SPOSetBuilderFactoryT.cpp                 |   50 +-
 src/QMCWaveFunctions/SPOSetBuilderFactoryT.h  |    7 +-
 src/QMCWaveFunctions/SPOSetScannerT.h         |  431 +--
 src/QMCWaveFunctions/SPOSetT.cpp              |  634 ++--
 src/QMCWaveFunctions/SPOSetT.h                | 1156 +++----
 src/QMCWaveFunctions/SpinorSetT.cpp           |  954 +++---
 src/QMCWaveFunctions/SpinorSetT.h             |  443 +--
 src/QMCWaveFunctions/VariableSetT.cpp         |  346 +++
 src/QMCWaveFunctions/VariableSetT.h           |  336 ++
 .../tests/ConstantSPOSetT.cpp                 |  155 +-
 src/QMCWaveFunctions/tests/ConstantSPOSetT.h  |  155 +-
 src/QMCWaveFunctions/tests/FakeSPOT.cpp       |  221 +-
 src/QMCWaveFunctions/tests/FakeSPOT.h         |   65 +-
 .../tests/test_ConstantSPOSetT.cpp            |    4 +-
 src/type_traits/complex_help.hpp              |    3 +
 88 files changed, 19254 insertions(+), 8060 deletions(-)
 create mode 100644 src/Particle/DistanceTableT.h
 create mode 100644 src/Particle/DynamicCoordinatesT.cpp
 create mode 100644 src/Particle/DynamicCoordinatesT.h
 create mode 100644 src/Particle/LongRange/KContainerT.cpp
 create mode 100644 src/Particle/LongRange/KContainerT.h
 create mode 100644 src/Particle/LongRange/StructFactT.cpp
 create mode 100644 src/Particle/LongRange/StructFactT.h
 create mode 100644 src/Particle/MCCoordsT.cpp
 create mode 100644 src/Particle/MCCoordsT.hpp
 create mode 100644 src/Particle/ParticleSetT.cpp
 create mode 100644 src/Particle/ParticleSetT.h
 create mode 100644 src/Particle/ParticleSetTraits.h
 create mode 100644 src/Particle/RealSpacePositionsT.h
 create mode 100644 src/Particle/RealSpacePositionsTOMPTarget.h
 create mode 100644 src/Particle/SimulationCellT.cpp
 create mode 100644 src/Particle/SimulationCellT.h
 create mode 100644 src/Particle/SoaDistanceTableAAT.h
 create mode 100644 src/Particle/SoaDistanceTableAATOMPTarget.h
 create mode 100644 src/Particle/SoaDistanceTableABT.h
 create mode 100644 src/Particle/SoaDistanceTableABTOMPTarget.h
 create mode 100644 src/Particle/VirtualParticleSetT.cpp
 create mode 100644 src/Particle/VirtualParticleSetT.h
 create mode 100644 src/Particle/createDistanceTableT.cpp
 create mode 100644 src/Particle/createDistanceTableT.h
 create mode 100644 src/Particle/createDistanceTableTOMPTarget.cpp
 create mode 100644 src/QMCWaveFunctions/BasisSetBaseT.h
 create mode 100644 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
 create mode 100644 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
 create mode 100644 src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
 create mode 100644 src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
 create mode 100644 src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
 create mode 100644 src/QMCWaveFunctions/OptimizableObjectT.h
 create mode 100644 src/QMCWaveFunctions/VariableSetT.cpp
 create mode 100644 src/QMCWaveFunctions/VariableSetT.h

diff --git a/src/Numerics/OneDimGridFactory.cpp b/src/Numerics/OneDimGridFactory.cpp
index 16a17ec9b3..bc90a9f505 100644
--- a/src/Numerics/OneDimGridFactory.cpp
+++ b/src/Numerics/OneDimGridFactory.cpp
@@ -13,19 +13,21 @@
 
 
 #include "OneDimGridFactory.h"
+#include "Configuration.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Message/UniformCommunicateError.h"
 
 namespace qmcplusplus
 {
-std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNodePtr cur)
+template <typename T>
+std::unique_ptr<typename OneDimGridFactory<T>::GridType> OneDimGridFactory<T>::createGrid(xmlNodePtr cur)
 {
   std::unique_ptr<GridType> agrid;
   RealType ri     = 1e-5;
   RealType rf     = 100.0;
   RealType ascale = -1.0e0;
   RealType astep  = 1.25e-2;
-  IndexType npts  = 1001;
+  QMCTraits::IndexType npts  = 1001;
   std::string gridType("log");
   std::string gridID("invalid");
   OhmmsAttributeSet radAttrib;
@@ -74,4 +76,7 @@ std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNo
   }
   return agrid;
 }
+
+template struct OneDimGridFactory<double>;
+template struct OneDimGridFactory<float>;
 } // namespace qmcplusplus
diff --git a/src/Numerics/OneDimGridFactory.h b/src/Numerics/OneDimGridFactory.h
index 6365db25aa..d27b1fb904 100644
--- a/src/Numerics/OneDimGridFactory.h
+++ b/src/Numerics/OneDimGridFactory.h
@@ -14,15 +14,17 @@
 
 #ifndef QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
 #define QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
-#include "Configuration.h"
 #include "Numerics/OneDimGridFunctor.h"
+#include "Numerics/LibxmlNumericIO.h"
 
 namespace qmcplusplus
 {
 /** Factory class using Singleton pattern
  */
-struct OneDimGridFactory : public QMCTraits
+template <typename T>
+struct OneDimGridFactory
 {
+  using RealType = T;
   ///typedef of the one-dimensional grid
   using GridType = OneDimGridBase<RealType>;
 
diff --git a/src/Numerics/SoaCartesianTensor.h b/src/Numerics/SoaCartesianTensor.h
index 21fa7f52bf..540ab826b0 100644
--- a/src/Numerics/SoaCartesianTensor.h
+++ b/src/Numerics/SoaCartesianTensor.h
@@ -37,7 +37,7 @@ namespace qmcplusplus
 template<class T>
 struct SoaCartesianTensor
 {
-  using value_type = T;
+  using ValueType = T;
   using ggg_type   = TinyVector<Tensor<T, 3>, 3>;
 
   ///maximum angular momentum
diff --git a/src/Numerics/SoaSphericalTensor.h b/src/Numerics/SoaSphericalTensor.h
index 56c638b42e..c5e4f3e1ae 100644
--- a/src/Numerics/SoaSphericalTensor.h
+++ b/src/Numerics/SoaSphericalTensor.h
@@ -37,6 +37,8 @@ namespace qmcplusplus
 template<typename T>
 struct SoaSphericalTensor
 {
+  using ValueType = T;
+
   ///maximum angular momentum for the center
   int Lmax;
   /// Normalization factors
diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt
index 42f036d057..b6517626c1 100644
--- a/src/Particle/CMakeLists.txt
+++ b/src/Particle/CMakeLists.txt
@@ -15,22 +15,30 @@
 set(PARTICLE
     InitMolecularSystem.cpp
     SimulationCell.cpp
+    SimulationCellT.cpp
     ParticleSetPool.cpp
     ParticleSet.cpp
+    ParticleSetT.cpp
     PSdispatcher.cpp
     VirtualParticleSet.cpp
+    VirtualParticleSetT.cpp
     ParticleSet.BC.cpp
     DynamicCoordinatesBuilder.cpp
+    DynamicCoordinatesT.cpp
     MCCoords.cpp
+    MCCoordsT.cpp
     MCWalkerConfiguration.cpp
     WalkerConfigurations.cpp
     SpeciesSet.cpp
     SampleStack.cpp
     createDistanceTableAA.cpp
     createDistanceTableAB.cpp
+    createDistanceTableT.cpp
     HDFWalkerInputManager.cpp
     LongRange/KContainer.cpp
+    LongRange/KContainerT.cpp
     LongRange/StructFact.cpp
+    LongRange/StructFactT.cpp
     LongRange/LPQHIBasis.cpp
     LongRange/LPQHISRCoulombBasis.cpp
     LongRange/EwaldHandlerQuasi2D.cpp
@@ -51,6 +59,7 @@ target_include_directories(qmcparticle PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(qmcparticle PRIVATE platform_cpu_LA)
 target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime)
 set(PARTICLE_OMPTARGET_SRCS
+    createDistanceTableTOMPTarget.cpp
     createDistanceTableAAOMPTarget.cpp
     createDistanceTableABOMPTarget.cpp)
 
diff --git a/src/Particle/DistanceTableT.h b/src/Particle/DistanceTableT.h
new file mode 100644
index 0000000000..5eaba1bd44
--- /dev/null
+++ b/src/Particle/DistanceTableT.h
@@ -0,0 +1,529 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
+//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H
+#define QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H
+
+#include <limits>
+
+#include "CPU/SIMD/aligned_allocator.hpp"
+#include "DTModes.h"
+#include "OhmmsPETE/OhmmsMatrix.h"
+#include "OhmmsPETE/OhmmsVector.h"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "Particle/ParticleSetT.h"
+#include "Particle/ParticleSetTraits.h"
+
+namespace qmcplusplus
+{
+class ResourceCollection;
+
+/** @ingroup nnlist
+ * @brief Abstract class to manage operations on pair data between two
+ * ParticleSets.
+ *
+ * Each DistanceTable object is defined by Source and Target of ParticleSet
+ * types. This base class doesn't contain storage. It is intended for
+ * update/compute invoked by ParticleSet. Derived AA/AB classes handle the
+ * actual storage and data access.
+ */
+template <typename T>
+class DistanceTableT
+{
+public:
+    static constexpr unsigned DIM = OHMMS_DIM;
+
+    using IndexType = typename ParticleSetTraits<T>::IndexType;
+    using RealType = typename ParticleSetTraits<T>::RealType;
+    using PosType = typename ParticleSetTraits<T>::PosType;
+    using DistRow = Vector<RealType, aligned_allocator<RealType>>;
+    using DisplRow = VectorSoaContainer<RealType, DIM>;
+
+protected:
+    // FIXME. once DT takes only DynamicCoordinates, change this type as well.
+    const ParticleSetT<T>& origin_;
+
+    const size_t num_sources_;
+    const size_t num_targets_;
+
+    /// name of the table
+    const std::string name_;
+
+    /// operation modes defined by DTModes
+    DTModes modes_;
+
+public:
+    /// constructor using source and target ParticleSet
+    DistanceTableT(const ParticleSetT<T>& source, const ParticleSetT<T>& target,
+        DTModes modes) :
+        origin_(source),
+        num_sources_(source.getTotalNum()),
+        num_targets_(target.getTotalNum()),
+        name_(source.getName() + "_" + target.getName()),
+        modes_(modes)
+    {
+    }
+
+    /// copy constructor. deleted
+    DistanceTableT(const DistanceTableT&) = delete;
+
+    /// virutal destructor
+    virtual ~DistanceTableT() = default;
+
+    /// get modes
+    inline DTModes
+    getModes() const
+    {
+        return modes_;
+    }
+
+    /// set modes
+    inline void
+    setModes(DTModes modes)
+    {
+        modes_ = modes;
+    }
+
+    /// return the name of table
+    inline const std::string&
+    getName() const
+    {
+        return name_;
+    }
+
+    /// returns the reference the origin particleset
+    const ParticleSetT<T>&
+    get_origin() const
+    {
+        return origin_;
+    }
+
+    /// returns the number of centers
+    inline size_t
+    centers() const
+    {
+        return origin_.getTotalNum();
+    }
+
+    /// returns the number of centers
+    inline size_t
+    targets() const
+    {
+        return num_targets_;
+    }
+
+    /// returns the number of source particles
+    inline size_t
+    sources() const
+    {
+        return num_sources_;
+    }
+
+    /** evaluate the full Distance Table
+     * @param P the target particle set
+     */
+    virtual void
+    evaluate(ParticleSetT<T>& P) = 0;
+    virtual void
+    mw_evaluate(const RefVectorWithLeader<DistanceTableT>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
+    {
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            dt_list[iw].evaluate(p_list[iw]);
+    }
+
+    /** recompute multi walker internal data, recompute
+     * @param dt_list the distance table batch
+     * @param p_list the target particle set batch
+     * @param recompute if true, must recompute. Otherwise, implementation
+     * dependent.
+     */
+    virtual void
+    mw_recompute(const RefVectorWithLeader<DistanceTableT>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const std::vector<bool>& recompute) const
+    {
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            if (recompute[iw])
+                dt_list[iw].evaluate(p_list[iw]);
+    }
+
+    /** evaluate the temporary pair relations when a move is proposed
+     * @param P the target particle set
+     * @param rnew proposed new position
+     * @param iat the particle to be moved
+     * @param prepare_old if true, prepare (temporary) old distances and
+     * displacements for using getOldDists and getOldDispls functions in
+     * acceptMove.
+     *
+     * Note: some distance table consumers (WaveFunctionComponent) have
+     * optimized code paths which require prepare_old = true for accepting a
+     * move. Drivers/Hamiltonians know whether moves will be accepted or not and
+     * manage this flag when calling ParticleSet::makeMoveXXX functions.
+     */
+    virtual void
+    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
+        bool prepare_old = true) = 0;
+
+    /** walker batched version of move. this function may be implemented
+     * asynchronously. Additional synchroniziation for collecting results should
+     * be handled by the caller. If DTModes::NEED_TEMP_DATA_ON_HOST, host data
+     * will be updated. If no consumer requests data on the host, the transfer
+     * is skipped.
+     */
+    virtual void
+    mw_move(const RefVectorWithLeader<DistanceTableT>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const std::vector<PosType>& rnew_list, const IndexType iat,
+        bool prepare_old = true) const
+    {
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old);
+    }
+
+    /** update the distance table by the pair relations from the temporal
+     * position. Used when a move is accepted in regular mode
+     * @param iat the particle with an accepted move
+     */
+    virtual void
+    update(IndexType jat) = 0;
+
+    /** fill partially the distance table by the pair relations from the
+     * temporary or old particle position. Used in forward mode when a move is
+     * reject
+     * @param iat the particle with an accepted move
+     * @param from_temp if true, copy from temp. if false, copy from old
+     */
+    virtual void
+    updatePartial(IndexType jat, bool from_temp)
+    {
+        if (from_temp)
+            update(jat);
+    }
+
+    /** walker batched version of updatePartial.
+     * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and
+     * host distance table will not be updated.
+     */
+    virtual void
+    mw_updatePartial(const RefVectorWithLeader<DistanceTableT>& dt_list,
+        IndexType jat, const std::vector<bool>& from_temp)
+    {
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            dt_list[iw].updatePartial(jat, from_temp[iw]);
+    }
+
+    /** finalize distance table calculation after particle-by-particle moves
+     * if update() doesn't make the table up-to-date during p-by-p moves
+     * finalizePbyP takes action to bring the table up-to-date
+     */
+    virtual void
+    finalizePbyP(const ParticleSetT<T>& P)
+    {
+    }
+
+    /** walker batched version of finalizePbyP
+     * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not
+     * updated at all during p-by-p Thus, a recompute is necessary to update the
+     * whole host distance table for consumers like the Coulomb potential.
+     */
+    virtual void
+    mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
+    {
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            dt_list[iw].finalizePbyP(p_list[iw]);
+    }
+
+    /** find the first nearest neighbor
+     * @param iat source particle id
+     * @param r distance
+     * @param dr displacement
+     * @param newpos if true, use the data in temp_r_ and temp_dr_ for the
+     * proposed move. if false, use the data in distance_[iat] and
+     * displacements_[iat]
+     * @return the id of the nearest particle, -1 not found
+     */
+    virtual int
+    get_first_neighbor(
+        IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0;
+
+    [[noreturn]] inline void
+    print(std::ostream& os)
+    {
+        throw std::runtime_error("DistanceTable::print is not supported");
+    }
+
+    /// initialize a shared resource and hand it to a collection
+    virtual void
+    createResource(ResourceCollection& collection) const
+    {
+    }
+
+    /// acquire a shared resource from a collection
+    virtual void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT>& dt_list) const
+    {
+    }
+
+    /// return a shared resource to a collection
+    virtual void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT>& dt_list) const
+    {
+    }
+};
+
+/** AA type of DistanceTable containing storage */
+template <typename T>
+class DistanceTableAAT : public DistanceTableT<T>
+{
+public:
+    using DistRow = typename DistanceTableT<T>::DistRow;
+    using DisplRow = typename DistanceTableT<T>::DisplRow;
+    using RealType = typename DistanceTableT<T>::RealType;
+
+protected:
+    /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+     *  Note: Derived classes decide if it is a memory view or the actual
+     * storage For only the lower triangle (j<i) data can be accessed safely.
+     *            There is no bound check to protect j>=i terms as the nature of
+     * operator[]. When the storage of the table is allocated as a single memory
+     * segment, out-of-bound access is still within the segment and thus doesn't
+     * trigger an alarm by the address sanitizer.
+     */
+    std::vector<DistRow> distances_;
+
+    /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
+     * r_A1[i] Note: Derived classes decide if it is a memory view or the actual
+     * storage only the lower triangle (j<i) is defined. See the note of
+     * distances_.
+     */
+    std::vector<DisplRow> displacements_;
+
+    /// temp_r
+    DistRow temp_r_;
+
+    /// temp_dr
+    DisplRow temp_dr_;
+
+    /// old distances
+    DistRow old_r_;
+
+    /// old displacements
+    DisplRow old_dr_;
+
+public:
+    /// constructor using source and target ParticleSet
+    DistanceTableAAT(const ParticleSetT<T>& target, DTModes modes) :
+        DistanceTableT<T>(target, target, modes)
+    {
+    }
+
+    /** return full table distances
+     */
+    const std::vector<DistRow>&
+    getDistances() const
+    {
+        return distances_;
+    }
+
+    /** return full table displacements
+     */
+    const std::vector<DisplRow>&
+    getDisplacements() const
+    {
+        return displacements_;
+    }
+
+    /** return a row of distances for a given target particle
+     */
+    const DistRow&
+    getDistRow(int iel) const
+    {
+        return distances_[iel];
+    }
+
+    /** return a row of displacements for a given target particle
+     */
+    const DisplRow&
+    getDisplRow(int iel) const
+    {
+        return displacements_[iel];
+    }
+
+    /** return the temporary distances when a move is proposed
+     */
+    const DistRow&
+    getTempDists() const
+    {
+        return temp_r_;
+    }
+
+    /** return the temporary displacements when a move is proposed
+     */
+    const DisplRow&
+    getTempDispls() const
+    {
+        return temp_dr_;
+    }
+
+    /** return old distances set up by move() for optimized distance table
+     * consumers
+     */
+    const DistRow&
+    getOldDists() const
+    {
+        return old_r_;
+    }
+
+    /** return old displacements set up by move() for optimized distance table
+     * consumers
+     */
+    const DisplRow&
+    getOldDispls() const
+    {
+        return old_dr_;
+    }
+
+    virtual size_t
+    get_num_particls_stored() const
+    {
+        return 0;
+    }
+
+    /// return multi walker temporary pair distance table data pointer
+    [[noreturn]] virtual const RealType*
+    getMultiWalkerTempDataPtr() const
+    {
+        throw std::runtime_error(
+            this->name_ + " multi walker data pointer for temp not supported");
+    }
+
+    virtual const RealType*
+    mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list, size_t range_begin,
+        size_t range_end) const
+    {
+        return nullptr;
+    }
+};
+
+/** AB type of DistanceTable containing storage */
+template <typename T>
+class DistanceTableABT : public DistanceTableT<T>
+{
+public:
+    using DistRow = typename DistanceTableT<T>::DistRow;
+    using DisplRow = typename DistanceTableT<T>::DisplRow;
+    using RealType = typename DistanceTableT<T>::RealType;
+
+protected:
+    /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+     *  Note: Derived classes decide if it is a memory view or the actual
+     * storage
+     */
+    std::vector<DistRow> distances_;
+
+    /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
+     * r_A1[i] Note: Derived classes decide if it is a memory view or the actual
+     * storage
+     */
+    std::vector<DisplRow> displacements_;
+
+    /// temp_r
+    DistRow temp_r_;
+
+    /// temp_dr
+    DisplRow temp_dr_;
+
+public:
+    /// constructor using source and target ParticleSet
+    DistanceTableABT(const ParticleSetT<T>& source,
+        const ParticleSetT<T>& target, DTModes modes) :
+        DistanceTableT<T>(source, target, modes)
+    {
+    }
+
+    /** return full table distances
+     */
+    const std::vector<DistRow>&
+    getDistances() const
+    {
+        return distances_;
+    }
+
+    /** return full table displacements
+     */
+    const std::vector<DisplRow>&
+    getDisplacements() const
+    {
+        return displacements_;
+    }
+
+    /** return a row of distances for a given target particle
+     */
+    const DistRow&
+    getDistRow(int iel) const
+    {
+        return distances_[iel];
+    }
+
+    /** return a row of displacements for a given target particle
+     */
+    const DisplRow&
+    getDisplRow(int iel) const
+    {
+        return displacements_[iel];
+    }
+
+    /** return the temporary distances when a move is proposed
+     */
+    const DistRow&
+    getTempDists() const
+    {
+        return temp_r_;
+    }
+
+    /** return the temporary displacements when a move is proposed
+     */
+    const DisplRow&
+    getTempDispls() const
+    {
+        return temp_dr_;
+    }
+
+    /// return multi-walker full (all pairs) distance table data pointer
+    [[noreturn]] virtual const RealType*
+    getMultiWalkerDataPtr() const
+    {
+        throw std::runtime_error(
+            this->name_ + " multi walker data pointer not supported");
+    }
+
+    /// return stride of per target pctl data. full table data = stride * num of
+    /// target particles
+    [[noreturn]] virtual size_t
+    getPerTargetPctlStrideSize() const
+    {
+        throw std::runtime_error(
+            this->name_ + " getPerTargetPctlStrideSize not supported");
+    }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/DynamicCoordinatesT.cpp b/src/Particle/DynamicCoordinatesT.cpp
new file mode 100644
index 0000000000..b563d264c1
--- /dev/null
+++ b/src/Particle/DynamicCoordinatesT.cpp
@@ -0,0 +1,43 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "Particle/DynamicCoordinatesT.h"
+
+#include "Particle/RealSpacePositionsT.h"
+#include "Particle/RealSpacePositionsTOMPTarget.h"
+
+namespace qmcplusplus
+{
+/** create DynamicCoordinates based on kind
+ */
+template <typename T>
+std::unique_ptr<DynamicCoordinatesT<T>>
+createDynamicCoordinatesT(const DynamicCoordinateKind kind)
+{
+    if (kind == DynamicCoordinateKind::DC_POS)
+        return std::make_unique<RealSpacePositionsT<T>>();
+    else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD)
+        return std::make_unique<RealSpacePositionsTOMPTarget<T>>();
+    // dummy return
+    return std::unique_ptr<RealSpacePositionsT<T>>();
+}
+
+template std::unique_ptr<DynamicCoordinatesT<double>>
+createDynamicCoordinatesT<double>(const DynamicCoordinateKind kind);
+template std::unique_ptr<DynamicCoordinatesT<float>>
+createDynamicCoordinatesT<float>(const DynamicCoordinateKind kind);
+template std::unique_ptr<DynamicCoordinatesT<std::complex<double>>>
+createDynamicCoordinatesT<std::complex<double>>(
+    const DynamicCoordinateKind kind);
+template std::unique_ptr<DynamicCoordinatesT<std::complex<float>>>
+createDynamicCoordinatesT<std::complex<float>>(
+    const DynamicCoordinateKind kind);
+} // namespace qmcplusplus
diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h
new file mode 100644
index 0000000000..d7fc1994fa
--- /dev/null
+++ b/src/Particle/DynamicCoordinatesT.h
@@ -0,0 +1,154 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file DynamicCoordinatesT.h
+ */
+#ifndef QMCPLUSPLUS_DYNAMICCOORDINATEST_H
+#define QMCPLUSPLUS_DYNAMICCOORDINATEST_H
+
+#include <memory>
+
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "ParticleSetTraits.h"
+#include "type_traits/template_types.hpp"
+#include "DynamicCoordinates.h"
+
+namespace qmcplusplus
+{
+class ResourceCollection;
+
+/** enumerator for DynamicCoordinates kinds
+ */
+// enum class DynamicCoordinateKind
+// {
+//     DC_POS, // SoA positions
+//     DC_POS_OFFLOAD, // SoA positions with OpenMP offload
+// };
+
+/** quantum variables of all the particles
+ */
+template <typename T>
+class DynamicCoordinatesT
+{
+public:
+    using RealType = typename ParticleSetTraits<T>::RealType;
+    using PosType = typename ParticleSetTraits<T>::PosType;
+    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
+    using PosVectorSoa =
+        VectorSoaContainer<RealType, ParticleSetTraits<T>::DIM>;
+
+    DynamicCoordinatesT(const DynamicCoordinateKind kind_in) :
+        variable_kind_(kind_in)
+    {
+    }
+
+    DynamicCoordinatesT(const DynamicCoordinatesT&) = default;
+    DynamicCoordinatesT&
+    operator=(const DynamicCoordinatesT&) = delete;
+
+    DynamicCoordinateKind
+    getKind() const
+    {
+        return variable_kind_;
+    }
+
+    virtual ~DynamicCoordinatesT() = default;
+
+    virtual std::unique_ptr<DynamicCoordinatesT>
+    makeClone() = 0;
+
+    /** resize internal storages based on the number of particles
+     *  @param n the number of particles
+     */
+    virtual void
+    resize(size_t n) = 0;
+    /// return the number of particles
+    virtual size_t
+    size() const = 0;
+
+    /// overwrite the positions of all the particles.
+    virtual void
+    setAllParticlePos(const ParticlePos& R) = 0;
+    /// overwrite the position of one the particle.
+    virtual void
+    setOneParticlePos(const PosType& pos, size_t iat) = 0;
+    /** copy the active positions of particles with a uniform id in all the
+     * walkers to a single internal buffer.
+     *  @param coords_list a batch of DynamicCoordinates
+     *  @param iat paricle id, uniform across coords_list
+     *  @param new_positions proposed positions
+     */
+    virtual void
+    mw_copyActivePos(
+        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list, size_t iat,
+        const std::vector<PosType>& new_positions) const
+    {
+        assert(this == &coords_list.getLeader());
+    }
+
+    /** overwrite the positions of particles with a uniform id in all the
+     * walkers upon acceptance.
+     *  @param coords_list a batch of DynamicCoordinates
+     *  @param iat paricle id, uniform across coords_list
+     *  @param new_positions proposed positions
+     *  @param isAccepted accept/reject info
+     */
+    virtual void
+    mw_acceptParticlePos(
+        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list, size_t iat,
+        const std::vector<PosType>& new_positions,
+        const std::vector<bool>& isAccepted) const = 0;
+
+    /// all particle position accessor
+    virtual const PosVectorSoa&
+    getAllParticlePos() const = 0;
+    /// one particle position accessor
+    virtual PosType
+    getOneParticlePos(size_t iat) const = 0;
+
+    /// secure internal data consistency after p-by-p moves
+    virtual void
+    donePbyP()
+    {
+    }
+
+    /// initialize a shared resource and hand it to a collection
+    virtual void
+    createResource(ResourceCollection& collection) const
+    {
+    }
+
+    /// acquire a shared resource from a collection
+    virtual void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
+    {
+    }
+
+    /// return a shared resource to a collection
+    virtual void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
+    {
+    }
+
+protected:
+    /// type of dynamic coordinates
+    const DynamicCoordinateKind variable_kind_;
+};
+
+/** create DynamicCoordinates based on kind
+ */
+template <typename T>
+std::unique_ptr<DynamicCoordinatesT<T>> createDynamicCoordinatesT(
+    const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS);
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/Lattice/LRBreakupParameters.h b/src/Particle/Lattice/LRBreakupParameters.h
index da44f6fc40..4096bf0e42 100644
--- a/src/Particle/Lattice/LRBreakupParameters.h
+++ b/src/Particle/Lattice/LRBreakupParameters.h
@@ -57,7 +57,7 @@ class LRBreakupParameters<T, 3>
       T beta2 = (dot(v1, v1) * dot(c, v2) - dot(v1, v2) * dot(c, v1)) /
           (dot(v1, v1) * dot(v2, v2) - dot(v1, v2) * dot(v1, v2));
       TinyVector<T, 3> p = beta1 * v1 + beta2 * v2;
-      T dist             = sqrt(dot(p - c, p - c));
+      T dist             = std::sqrt(dot(p - c, p - c));
       LR_rc              = std::min(LR_rc, dist);
     }
     //Set KC for structure-factor and LRbreakups.
diff --git a/src/Particle/LongRange/KContainerT.cpp b/src/Particle/LongRange/KContainerT.cpp
new file mode 100644
index 0000000000..eee850387d
--- /dev/null
+++ b/src/Particle/LongRange/KContainerT.cpp
@@ -0,0 +1,272 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "KContainerT.h"
+
+#include "LRCoulombSingleton.h"
+#include "Message/Communicate.h"
+#include "Utilities/qmc_common.h"
+
+#include <cstdint>
+#include <map>
+
+namespace qmcplusplus
+{
+template <typename T>
+void
+KContainerT<T>::updateKLists(const ParticleLayout& lattice, RealType kc,
+    unsigned ndim, const PosType& twist, bool useSphere)
+{
+    kcutoff = kc;
+    if (kcutoff <= 0.0) {
+        APP_ABORT("  Illegal cutoff for KContainer");
+    }
+    findApproxMMax(lattice, ndim);
+    BuildKLists(lattice, twist, useSphere);
+
+    app_log() << "  KContainer initialised with cutoff " << kcutoff
+              << std::endl;
+    app_log() << "   # of K-shell  = " << kshell.size() << std::endl;
+    app_log() << "   # of K points = " << kpts.size() << std::endl;
+    app_log() << std::endl;
+}
+
+template <typename T>
+void
+KContainerT<T>::findApproxMMax(const ParticleLayout& lattice, unsigned ndim)
+{
+    // Estimate the size of the parallelpiped that encompasses a sphere of
+    // kcutoff. mmax is stored as integer translations of the reciprocal cell
+    // vectors. Does not require an orthorhombic cell.
+    /* Old method.
+    //2pi is not included in lattice.b
+    Matrix<RealType> mmat;
+    mmat.resize(3,3);
+    for(int j=0;j<3;j++)
+      for(int i=0;i<3;i++){
+        mmat[i][j] = 0.0;
+        for(int k=0;k<3;k++)
+    mmat[i][j] = mmat[i][j] + 4.0*M_PI*M_PI*lattice.b(k)[i]*lattice.b(j)[k];
+      }
+
+    TinyVector<RealType,3> x,temp;
+    RealType tempr;
+    for(int idim=0;idim<3;idim++){
+      int i = ((idim)%3);
+      int j = ((idim+1)%3);
+      int k = ((idim+2)%3);
+
+      x[i] = 1.0;
+      x[j] = (mmat[j][k]*mmat[k][i] - mmat[k][k]*mmat[i][j]);
+      x[j]/= (mmat[j][j]*mmat[k][k] - mmat[j][k]*mmat[j][k]);
+      x[k] = -(mmat[k][i] + mmat[j][k]*x[j])/mmat[k][k];
+
+      for(i=0;i<3;i++){
+        temp[i] = 0.0;
+    for(j=0;j<3;j++)
+      temp[i] += mmat[i][j]*x[j];
+      }
+
+      tempr = dot(x,temp);
+      mmax[idim] = static_cast<int>(sqrt(4.0*kcut2/tempr)) + 1;
+    }
+    */
+    // see rmm, Electronic Structure, p. 85 for details
+    for (int i = 0; i < DIM; i++)
+        mmax[i] = static_cast<int>(
+                      std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) *
+                          kcutoff / (2 * M_PI))) +
+            1;
+
+    mmax[DIM] = mmax[0];
+    for (int i = 1; i < DIM; ++i)
+        mmax[DIM] = std::max(mmax[i], mmax[DIM]);
+
+    // overwrite the non-periodic directon to be zero
+    if (LRCoulombSingleton::isQuasi2D()) {
+        app_log() << "  No kspace sum perpendicular to slab " << std::endl;
+        mmax[2] = 0;
+    }
+    if (ndim < 3) {
+        app_log() << "  No kspace sum along z " << std::endl;
+        mmax[2] = 0;
+    }
+    if (ndim < 2)
+        mmax[1] = 0;
+}
+
+template <typename T>
+void
+KContainerT<T>::BuildKLists(
+    const ParticleLayout& lattice, const PosType& twist, bool useSphere)
+{
+    TinyVector<int, DIM + 1> TempActualMax;
+    TinyVector<int, DIM> kvec;
+    TinyVector<RealType, DIM> kvec_cart;
+    RealType modk2;
+    std::vector<TinyVector<int, DIM>> kpts_tmp;
+    std::vector<PosType> kpts_cart_tmp;
+    std::vector<RealType> ksq_tmp;
+    // reserve the space for memory efficiency
+    if (useSphere) {
+        const RealType kcut2 = kcutoff * kcutoff;
+        // Loop over guesses for valid k-points.
+        for (int i = -mmax[0]; i <= mmax[0]; i++) {
+            kvec[0] = i;
+            for (int j = -mmax[1]; j <= mmax[1]; j++) {
+                kvec[1] = j;
+                for (int k = -mmax[2]; k <= mmax[2]; k++) {
+                    kvec[2] = k;
+                    // Do not include k=0 in evaluations.
+                    if (i == 0 && j == 0 && k == 0)
+                        continue;
+                    // Convert kvec to Cartesian
+                    kvec_cart = lattice.k_cart(kvec + twist);
+                    // Find modk
+                    modk2 = dot(kvec_cart, kvec_cart);
+                    if (modk2 > kcut2)
+                        continue; // Inside cutoff?
+                    // This k-point should be added to the list
+                    kpts_tmp.push_back(kvec);
+                    kpts_cart_tmp.push_back(kvec_cart);
+                    ksq_tmp.push_back(modk2);
+                    // Update record of the allowed maximum translation.
+                    for (int idim = 0; idim < 3; idim++)
+                        if (std::abs(kvec[idim]) > TempActualMax[idim])
+                            TempActualMax[idim] = std::abs(kvec[idim]);
+                }
+            }
+        }
+    }
+    else {
+        // Loop over all k-points in the parallelpiped and add them to
+        // kcontainer note layout is for interfacing with fft, so for each
+        // dimension, the positive indexes come first then the negative indexes
+        // backwards e.g.    0, 1, .... mmax, -mmax+1, -mmax+2, ... -1
+        const int idimsize = mmax[0] * 2;
+        const int jdimsize = mmax[1] * 2;
+        const int kdimsize = mmax[2] * 2;
+        for (int i = 0; i < idimsize; i++) {
+            kvec[0] = i;
+            if (kvec[0] > mmax[0])
+                kvec[0] -= idimsize;
+            for (int j = 0; j < jdimsize; j++) {
+                kvec[1] = j;
+                if (kvec[1] > mmax[1])
+                    kvec[1] -= jdimsize;
+                for (int k = 0; k < kdimsize; k++) {
+                    kvec[2] = k;
+                    if (kvec[2] > mmax[2])
+                        kvec[2] -= kdimsize;
+                    // get cartesian location and modk2
+                    kvec_cart = lattice.k_cart(kvec);
+                    modk2 = dot(kvec_cart, kvec_cart);
+                    // add k-point to lists
+                    kpts_tmp.push_back(kvec);
+                    kpts_cart_tmp.push_back(kvec_cart);
+                    ksq_tmp.push_back(modk2);
+                }
+            }
+        }
+        // set allowed maximum translation
+        TempActualMax[0] = mmax[0];
+        TempActualMax[1] = mmax[1];
+        TempActualMax[2] = mmax[2];
+    }
+
+    // Update a record of the number of k vectors
+    numk = kpts_tmp.size();
+    std::map<int64_t, std::vector<int>*> kpts_sorted;
+    // create the map: use simple integer with resolution of 0.00000001 in ksq
+    for (int ik = 0; ik < numk; ik++) {
+        // This is a workaround for ewald bug (Issue #2105).  Basically, 1e-7 is
+        // the resolution of |k|^2 for doubles, so we jack up the tolerance to
+        // match that.
+        const int64_t k_ind = static_cast<int64_t>(ksq_tmp[ik] * 10000000);
+        auto it(kpts_sorted.find(k_ind));
+        if (it == kpts_sorted.end()) {
+            std::vector<int>* newSet = new std::vector<int>;
+            kpts_sorted[k_ind] = newSet;
+            newSet->push_back(ik);
+        }
+        else {
+            (*it).second->push_back(ik);
+        }
+    }
+    std::map<int64_t, std::vector<int>*>::iterator it(kpts_sorted.begin());
+    kpts.resize(numk);
+    kpts_cart.resize(numk);
+    kpts_cart_soa_.resize(numk);
+    ksq.resize(numk);
+    kshell.resize(kpts_sorted.size() + 1, 0);
+    int ok = 0, ish = 0;
+    while (it != kpts_sorted.end()) {
+        std::vector<int>::iterator vit((*it).second->begin());
+        while (vit != (*it).second->end()) {
+            int ik = (*vit);
+            kpts[ok] = kpts_tmp[ik];
+            kpts_cart[ok] = kpts_cart_tmp[ik];
+            kpts_cart_soa_(ok) = kpts_cart_tmp[ik];
+            ksq[ok] = ksq_tmp[ik];
+            ++vit;
+            ++ok;
+        }
+        kshell[ish + 1] = kshell[ish] + (*it).second->size();
+        ++it;
+        ++ish;
+    }
+    kpts_cart_soa_.updateTo();
+    it = kpts_sorted.begin();
+    std::map<int64_t, std::vector<int>*>::iterator e_it(kpts_sorted.end());
+    while (it != e_it) {
+        delete it->second;
+        it++;
+    }
+    // Finished searching k-points. Copy list of maximum translations.
+    mmax[DIM] = 0;
+    for (int idim = 0; idim < DIM; idim++) {
+        mmax[idim] = TempActualMax[idim];
+        mmax[DIM] = std::max(mmax[idim], mmax[DIM]);
+        // if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim];
+    }
+    // Now fill the array that returns the index of -k when given the index of
+    // k.
+    minusk.resize(numk);
+
+    // Assigns a unique hash value to each kpoint.
+    auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t {
+        int64_t hash = 0; // this will cause integral promotion below
+        for (int i = 0; i < inpv.Size; ++i)
+            hash += inpv[i] + hash * hashparam;
+        return hash;
+    };
+
+    // Create a map from the hash value for each k vector to the index
+    std::map<int64_t, int> hashToIndex;
+    for (int ki = 0; ki < numk; ki++) {
+        hashToIndex[getHashOfVec(kpts[ki], numk)] = ki;
+    }
+    // Use the map to find the index of -k from the index of k
+    for (int ki = 0; ki < numk; ki++) {
+        minusk[ki] = hashToIndex[getHashOfVec(-1 * kpts[ki], numk)];
+    }
+}
+
+template class KContainerT<double>;
+template class KContainerT<float>;
+template class KContainerT<std::complex<double>>;
+template class KContainerT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/Particle/LongRange/KContainerT.h b/src/Particle/LongRange/KContainerT.h
new file mode 100644
index 0000000000..2f975569cc
--- /dev/null
+++ b/src/Particle/LongRange/KContainerT.h
@@ -0,0 +1,115 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_KCONTAINERT_H
+#define QMCPLUSPLUS_KCONTAINERT_H
+
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "ParticleSetTraits.h"
+
+namespace qmcplusplus
+{
+/** Container for k-points
+ *
+ * It generates a set of k-points that are unit-translations of the
+ * reciprocal-space cell. K-points are generated within a spherical cutoff set
+ * by the supercell
+ */
+template <typename T>
+class KContainerT
+{
+public:
+    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+    using RealType = typename ParticleSetTraits<T>::RealType;
+    using PosType = typename ParticleSetTraits<T>::PosType;
+
+private:
+    /// The cutoff up to which k-vectors are generated.
+    RealType kcutoff;
+
+public:
+    // Typedef for the lattice-type
+    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
+
+    /// number of k-points
+    int numk;
+
+    /** maximum integer translations of reciprocal cell within kc.
+     *
+     * Last index is max. of first dimension+1
+     */
+    TinyVector<int, DIM + 1> mmax;
+
+    /** K-vector in reduced coordinates
+     */
+    std::vector<TinyVector<int, DIM>> kpts;
+    /** K-vector in Cartesian coordinates
+     */
+    std::vector<PosType> kpts_cart;
+    /** squre of kpts in Cartesian coordniates
+     */
+    std::vector<RealType> ksq;
+    /** Given a k index, return index to -k
+     */
+    std::vector<int> minusk;
+    /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */
+    std::vector<int> kshell;
+
+    /** k points sorted by the |k|  excluding |k|=0
+     *
+     * The first for |k|
+     * The second for a map to the full index. The size of the second is the
+     * degeneracy.
+     */
+    // std::map<int,std::vector<int>*>  kpts_sorted;
+
+    /** update k-vectors
+     * @param sc supercell
+     * @param kc cutoff radius in the K
+     * @param twist shifts the center of the grid of k-vectors
+     * @param useSphere if true, use the |K|
+     */
+    void
+    updateKLists(const ParticleLayout& lattice, RealType kc, unsigned ndim,
+        const PosType& twist = PosType(), bool useSphere = true);
+
+    const auto&
+    get_kpts_cart_soa() const
+    {
+        return kpts_cart_soa_;
+    }
+
+private:
+    /** compute approximate parallelpiped that surrounds kc
+     * @param lattice supercell
+     */
+    void
+    findApproxMMax(const ParticleLayout& lattice, unsigned ndim);
+    /** construct the container for k-vectors */
+    void
+    BuildKLists(
+        const ParticleLayout& lattice, const PosType& twist, bool useSphere);
+
+    /** K-vector in Cartesian coordinates in SoA layout
+     */
+    VectorSoaContainer<RealType, DIM, OffloadAllocator<RealType>>
+        kpts_cart_soa_;
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Particle/LongRange/StructFactT.cpp b/src/Particle/LongRange/StructFactT.cpp
new file mode 100644
index 0000000000..6f1dae8a9e
--- /dev/null
+++ b/src/Particle/LongRange/StructFactT.cpp
@@ -0,0 +1,249 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak
+//                    Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "StructFactT.h"
+
+#include "CPU/BLAS.hpp"
+#include "CPU/SIMD/vmath.hpp"
+#include "CPU/e2iphi.h"
+#include "CPU/math.hpp"
+#include "LRCoulombSingleton.h"
+#include "OMPTarget/OMPTargetMath.hpp"
+#include "RealSpacePositionsTOMPTarget.h"
+#include "Utilities/qmc_common.h"
+#include "ParticleSetT.h"
+
+namespace qmcplusplus
+{
+// Constructor - pass arguments to k_lists_' constructor
+template <typename T>
+StructFactT<T>::StructFactT(
+    const ParticleLayout& lattice, const KContainer& k_lists) :
+    SuperCellEnum(SUPERCELL_BULK),
+    k_lists_(k_lists),
+    StorePerParticle(false),
+    update_all_timer_(
+        createGlobalTimer("StructFact::update_all_part", timer_level_fine))
+{
+    if (LRCoulombSingleton::isQuasi2D()) {
+        app_log() << "  Setting StructFact::SuperCellEnum=SUPERCELL_SLAB "
+                  << std::endl;
+        SuperCellEnum = SUPERCELL_SLAB;
+    }
+}
+
+// Destructor
+template <typename T>
+StructFactT<T>::~StructFactT() = default;
+
+template <typename T>
+void
+StructFactT<T>::resize(int nkpts, int num_species, int num_ptcls)
+{
+    rhok_r.resize(num_species, nkpts);
+    rhok_i.resize(num_species, nkpts);
+    if (StorePerParticle) {
+        eikr_r.resize(num_ptcls, nkpts);
+        eikr_i.resize(num_ptcls, nkpts);
+    }
+}
+
+template <typename T>
+void
+StructFactT<T>::updateAllPart(const ParticleSetT<T>& P)
+{
+    ScopedTimer local(update_all_timer_);
+    computeRhok(P);
+}
+
+template <typename T>
+void
+StructFactT<T>::mw_updateAllPart(
+    const RefVectorWithLeader<StructFactT>& sk_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+    SKMultiWalkerMemT<T>& mw_mem)
+{
+    auto& sk_leader = sk_list.getLeader();
+    auto& p_leader = p_list.getLeader();
+    ScopedTimer local(sk_leader.update_all_timer_);
+    if (p_leader.getCoordinates().getKind() !=
+            DynamicCoordinateKind::DC_POS_OFFLOAD ||
+        sk_leader.StorePerParticle)
+        for (int iw = 0; iw < sk_list.size(); iw++)
+            sk_list[iw].computeRhok(p_list[iw]);
+    else {
+        const size_t nw = p_list.size();
+        const size_t num_species = p_leader.groups();
+        const auto& kpts_cart = sk_leader.k_lists_.get_kpts_cart_soa();
+        const size_t nk = sk_leader.k_lists_.numk;
+        const size_t nk_padded = kpts_cart.capacity();
+
+        auto& coordinates_leader =
+            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
+                p_leader.getCoordinates());
+        auto& mw_rsoa_dev_ptrs =
+            coordinates_leader.getMultiWalkerRSoADevicePtrs();
+        const size_t np_padded =
+            p_leader.getCoordinates().getAllParticlePos().capacity();
+
+        constexpr size_t cplx_stride = 2;
+        mw_mem.nw_rhok.resize(nw * num_species * cplx_stride, nk_padded);
+
+        // make the compute over nk by blocks
+        constexpr size_t kblock_size = 512;
+        const size_t num_kblocks = (nk + kblock_size) / kblock_size;
+
+        auto* mw_rsoa_ptr = mw_rsoa_dev_ptrs.data();
+        auto* kpts_cart_ptr = kpts_cart.data();
+        auto* mw_rhok_ptr = mw_mem.nw_rhok.data();
+        auto* group_offsets = p_leader.get_group_offsets().data();
+
+        PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+                map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])")
+        for (int iw = 0; iw < nw; iw++)
+            for (int ib = 0; ib < num_kblocks; ib++) {
+                const size_t offset = ib * kblock_size;
+                const size_t this_block_size =
+                    omptarget::min(kblock_size, nk - offset);
+                const auto* rsoa_ptr = mw_rsoa_ptr[iw];
+
+                PRAGMA_OFFLOAD("omp parallel for")
+                for (int ik = 0; ik < this_block_size; ik++)
+                    for (int is = 0; is < num_species; is++) {
+                        RealType rhok_r(0), rhok_i(0);
+
+                        for (int ip = group_offsets[is];
+                             ip < group_offsets[is + 1]; ip++) {
+                            RealType s, c, phase(0);
+                            for (int idim = 0; idim < DIM; idim++)
+                                phase += kpts_cart_ptr[ik + offset +
+                                             nk_padded * idim] *
+                                    rsoa_ptr[ip + idim * np_padded];
+                            omptarget::sincos(phase, &s, &c);
+                            rhok_r += c;
+                            rhok_i += s;
+                        }
+
+                        mw_rhok_ptr[(iw * num_species + is) * cplx_stride *
+                                nk_padded +
+                            offset + ik] = rhok_r;
+                        mw_rhok_ptr[(iw * num_species + is) * cplx_stride *
+                                nk_padded +
+                            nk_padded + offset + ik] = rhok_i;
+                    }
+            }
+
+        for (int iw = 0; iw < nw; iw++)
+            for (int is = 0; is < num_species; is++) {
+                std::copy_n(
+                    mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride], nk,
+                    sk_list[iw].rhok_r[is]);
+                std::copy_n(
+                    mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride + 1],
+                    nk, sk_list[iw].rhok_i[is]);
+            }
+    }
+}
+
+/** evaluate rok per species, eikr  per particle
+ */
+template <typename T>
+void
+StructFactT<T>::computeRhok(const ParticleSetT<T>& P)
+{
+    const size_t num_ptcls = P.getTotalNum();
+    const size_t num_species = P.groups();
+    const size_t nk = k_lists_.numk;
+    resize(nk, num_species, num_ptcls);
+
+    rhok_r = 0.0;
+    rhok_i = 0.0;
+    if (StorePerParticle) {
+        // save per particle and species value
+        for (int i = 0; i < num_ptcls; ++i) {
+            const auto& pos = P.R[i];
+            auto* restrict eikr_r_ptr = eikr_r[i];
+            auto* restrict eikr_i_ptr = eikr_i[i];
+            auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
+            auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
+#pragma omp simd
+            for (int ki = 0; ki < nk; ki++) {
+                qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos),
+                    &eikr_i_ptr[ki], &eikr_r_ptr[ki]);
+                rhok_r_ptr[ki] += eikr_r_ptr[ki];
+                rhok_i_ptr[ki] += eikr_i_ptr[ki];
+            }
+        }
+    }
+    else {
+        // save per species value
+        for (int i = 0; i < num_ptcls; ++i) {
+            const auto& pos = P.R[i];
+            auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
+            auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+#pragma omp simd
+            for (int ki = 0; ki < nk; ki++) {
+                RealType s, c;
+                qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &s, &c);
+                rhok_r_ptr[ki] += c;
+                rhok_i_ptr[ki] += s;
+            }
+#else
+            // make the compute over nk by blocks
+            constexpr size_t kblock_size = 512;
+            const size_t num_kblocks = (nk + kblock_size) / kblock_size;
+            RealType phiV[kblock_size], eikr_r_temp[kblock_size],
+                eikr_i_temp[kblock_size];
+
+            for (int ib = 0; ib < num_kblocks; ib++) {
+                const size_t offset = ib * kblock_size;
+                const size_t this_block_size =
+                    std::min(kblock_size, nk - offset);
+                for (int ki = 0; ki < this_block_size; ki++)
+                    phiV[ki] = dot(k_lists_.kpts_cart[ki + offset], pos);
+                eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp);
+                for (int ki = 0; ki < this_block_size; ki++) {
+                    rhok_r_ptr[ki + offset] += eikr_r_temp[ki];
+                    rhok_i_ptr[ki + offset] += eikr_i_temp[ki];
+                }
+            }
+#endif
+        }
+    }
+}
+
+template <typename T>
+void
+StructFactT<T>::turnOnStorePerParticle(const ParticleSetT<T>& P)
+{
+    if (!StorePerParticle) {
+        StorePerParticle = true;
+        computeRhok(P);
+    }
+}
+
+template class StructFactT<double>;
+template class StructFactT<float>;
+template class StructFactT<std::complex<double>>;
+template class StructFactT<std::complex<float>>;
+
+template struct SKMultiWalkerMemT<double>;
+template struct SKMultiWalkerMemT<float>;
+template struct SKMultiWalkerMemT<std::complex<double>>;
+template struct SKMultiWalkerMemT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h
new file mode 100644
index 0000000000..218b3adf31
--- /dev/null
+++ b/src/Particle/LongRange/StructFactT.h
@@ -0,0 +1,159 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_STRUCTFACTT_H
+#define QMCPLUSPLUS_STRUCTFACTT_H
+
+#include "OhmmsPETE/OhmmsMatrix.h"
+#include "OhmmsPETE/OhmmsVector.h"
+#include "Particle/ParticleSetTraits.h"
+#include <NewTimer.h>
+#include <OMPTarget/OffloadAlignedAllocators.hpp>
+#include <Resource.h>
+#include <type_traits/template_types.hpp>
+
+namespace qmcplusplus
+{
+template <typename T>
+class ParticleSetT;
+class KContainer;
+template <typename T>
+struct SKMultiWalkerMemT;
+
+/** @ingroup longrange
+ *\brief Calculates the structure-factor for a particle set
+ *
+ * Structure factor per species
+ *   Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf
+ *r_i}}\f$ Structure factor per particle eikr[i][k]
+ */
+template <typename T>
+class StructFactT
+{
+public:
+    // Typedef for the lattice-type
+    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
+    using RealType = typename ParticleSetTraits<T>::RealType;
+
+    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+
+    /** enumeration for the methods to handle mixed bconds
+     *
+     * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with
+     * mixed BC
+     */
+    int SuperCellEnum;
+    /// 2-D container for the phase
+    Matrix<RealType> rhok_r, rhok_i;
+    Matrix<RealType> eikr_r, eikr_i;
+    /** Constructor - copy ParticleSet and init. k-shells
+     * @param lattice long range box
+     * @param kc cutoff for k
+     *
+     * At least in the batched version Structure factor is _NOT_ valid
+     * after construction.
+     */
+    StructFactT(const ParticleLayout& lattice, const KContainer& k_lists);
+    /// desructor
+    ~StructFactT();
+
+    /**  Update Rhok if all particles moved
+     */
+    void
+    updateAllPart(const ParticleSetT<T>& P);
+
+    /** Update RhoK for all particles for multiple walkers particles.
+     *
+     *  In batched context until this is called StructFact is invalid and will
+     * cause a crash if any Hamiltonian using StructFact indirectly through
+     * ParticleSet is evaluated.
+     */
+    static void
+    mw_updateAllPart(const RefVectorWithLeader<StructFactT>& sk_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        SKMultiWalkerMemT<T>& mw_mem);
+
+    /** @brief switch on the storage per particle
+     * if StorePerParticle was false, this function allocates memory and
+     * precompute data if StorePerParticle was true, this function is no-op
+     */
+    void
+    turnOnStorePerParticle(const ParticleSetT<T>& P);
+
+    /// accessor of StorePerParticle
+    bool
+    isStorePerParticle() const
+    {
+        return StorePerParticle;
+    }
+
+    /// accessor of k_lists_
+    const KContainer&
+    getKLists() const
+    {
+        return k_lists_;
+    }
+
+private:
+    /// Compute all rhok elements from the start
+    void
+    computeRhok(const ParticleSetT<T>& P);
+    /** resize the internal data
+     * @param nkpts
+     * @param num_species number of species
+     * @param num_ptcls number of particles
+     */
+    void
+    resize(int nkpts, int num_species, int num_ptcls);
+
+    /// K-Vector List.
+    const KContainer& k_lists_;
+    /** Whether intermediate data is stored per particle. default false
+     * storing data per particle needs significant amount of memory but some
+     * calculation may request it. storing data per particle specie is more
+     * cost-effective
+     */
+    bool StorePerParticle;
+    /// timer for updateAllPart
+    NewTimer& update_all_timer_;
+};
+
+/// multi walker shared memory buffer
+template <typename T>
+struct SKMultiWalkerMemT : public Resource
+{
+    using RealType = typename StructFactT<T>::RealType;
+
+    /// dist displ for temporary and old pairs
+    Matrix<RealType, OffloadPinnedAllocator<RealType>> nw_rhok;
+
+    SKMultiWalkerMemT() : Resource("SKMultiWalkerMem")
+    {
+    }
+
+    SKMultiWalkerMemT(const SKMultiWalkerMemT&) : SKMultiWalkerMemT()
+    {
+    }
+
+    std::unique_ptr<Resource>
+    makeClone() const override
+    {
+        return std::make_unique<SKMultiWalkerMemT>(*this);
+    }
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Particle/MCCoordsT.cpp b/src/Particle/MCCoordsT.cpp
new file mode 100644
index 0000000000..fd63c84a6c
--- /dev/null
+++ b/src/Particle/MCCoordsT.cpp
@@ -0,0 +1,69 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "MCCoordsT.hpp"
+
+namespace qmcplusplus
+{
+template <typename T>
+void
+MCCoordsT<T, CoordsType::POS>::getSubset(const std::size_t offset,
+    const std::size_t size, MCCoordsT<T, CoordsType::POS>& out) const
+{
+    std::copy_n(positions.begin() + offset, size, out.positions.begin());
+}
+
+template <typename T>
+MCCoordsT<T, CoordsType::POS>&
+MCCoordsT<T, CoordsType::POS>::operator+=(
+    const MCCoordsT<T, CoordsType::POS>& rhs)
+{
+    assert(positions.size() == rhs.positions.size());
+    std::transform(positions.begin(), positions.end(), rhs.positions.begin(),
+        positions.begin(),
+        [](const PosType& x, const PosType& y) { return x + y; });
+    return *this;
+}
+
+template <typename T>
+void
+MCCoordsT<T, CoordsType::POS_SPIN>::getSubset(const std::size_t offset,
+    const std::size_t size, MCCoordsT<T, CoordsType::POS_SPIN>& out) const
+{
+    std::copy_n(positions.begin() + offset, size, out.positions.begin());
+    std::copy_n(spins.begin() + offset, size, out.spins.begin());
+}
+
+template <typename T>
+MCCoordsT<T, CoordsType::POS_SPIN>&
+MCCoordsT<T, CoordsType::POS_SPIN>::operator+=(
+    const MCCoordsT<T, CoordsType::POS_SPIN>& rhs)
+{
+    assert(positions.size() == rhs.positions.size());
+    std::transform(positions.begin(), positions.end(), rhs.positions.begin(),
+        positions.begin(),
+        [](const PosType& x, const PosType& y) { return x + y; });
+    std::transform(spins.begin(), spins.end(), rhs.spins.begin(), spins.begin(),
+        [](const FullPrecRealType& x, const FullPrecRealType& y) {
+            return x + y;
+        });
+    return *this;
+}
+
+template struct MCCoordsT<double, CoordsType::POS>;
+template struct MCCoordsT<double, CoordsType::POS_SPIN>;
+template struct MCCoordsT<float, CoordsType::POS>;
+template struct MCCoordsT<float, CoordsType::POS_SPIN>;
+template struct MCCoordsT<std::complex<double>, CoordsType::POS>;
+template struct MCCoordsT<std::complex<double>, CoordsType::POS_SPIN>;
+template struct MCCoordsT<std::complex<float>, CoordsType::POS>;
+template struct MCCoordsT<std::complex<float>, CoordsType::POS_SPIN>;
+} // namespace qmcplusplus
diff --git a/src/Particle/MCCoordsT.hpp b/src/Particle/MCCoordsT.hpp
new file mode 100644
index 0000000000..50b419178f
--- /dev/null
+++ b/src/Particle/MCCoordsT.hpp
@@ -0,0 +1,82 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
+//                    Laboratories
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_MCCOORDST_HPP
+#define QMCPLUSPLUS_MCCOORDST_HPP
+
+#include "MCCoords.hpp"
+#include "ParticleSetTraits.h"
+#include "type_traits/complex_help.hpp"
+
+#include <algorithm>
+#include <vector>
+
+namespace qmcplusplus
+{
+// enum class CoordsType
+// {
+//   POS,
+//   POS_SPIN
+// };
+
+template <typename T, CoordsType MCT>
+struct MCCoordsT;
+
+template <typename T>
+struct MCCoordsT<T, CoordsType::POS>
+{
+    using PosType = typename ParticleSetTraits<T>::PosType;
+
+    MCCoordsT(const std::size_t size) : positions(size)
+    {
+    }
+
+    MCCoordsT&
+    operator+=(const MCCoordsT& rhs);
+
+    /** get subset of MCCoordsT
+     * [param,out] out
+     */
+    void
+    getSubset(const std::size_t offset, const std::size_t size,
+        MCCoordsT<T, CoordsType::POS>& out) const;
+
+    std::vector<PosType> positions;
+};
+
+template <typename T>
+struct MCCoordsT<T, CoordsType::POS_SPIN>
+{
+    using PosType = typename ParticleSetTraits<T>::PosType;
+    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
+
+    MCCoordsT(const std::size_t size) : positions(size), spins(size)
+    {
+    }
+
+    MCCoordsT&
+    operator+=(const MCCoordsT& rhs);
+
+    /** get subset of MCCoordsT
+     * [param,out] out
+     */
+    void
+    getSubset(const std::size_t offset, const std::size_t size,
+        MCCoordsT<T, CoordsType::POS_SPIN>& out) const;
+
+    std::vector<PosType> positions;
+    std::vector<FullPrecRealType> spins;
+};
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp
new file mode 100644
index 0000000000..5b78bed54e
--- /dev/null
+++ b/src/Particle/ParticleSetT.cpp
@@ -0,0 +1,1200 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Luke Shulenburger, lshulen@sandia.gov, Sandia National
+//                    Laboratories Jeremy McMinnis, jmcminis@gmail.com,
+//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Ye Luo, yeluo@anl.gov, Argonne
+//                    National Laboratory Mark A. Berrill, berrillma@ornl.gov,
+//                    Oak Ridge National Laboratory Mark Dewing,
+//                    markdewing@gmail.com, University of Illinois at
+//                    Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "ParticleSetT.h"
+
+#include "Particle/DistanceTableT.h"
+#include "Particle/DynamicCoordinatesBuilder.h"
+#include "Particle/LongRange/StructFactT.h"
+#include "Particle/createDistanceTableT.h"
+#include "ParticleBase/RandomSeqGeneratorGlobal.h"
+#include "ResourceCollection.h"
+#include "Utilities/IteratorUtility.h"
+#include "Utilities/RandomGenerator.h"
+
+#include <iomanip>
+#include <numeric>
+
+namespace qmcplusplus
+{
+using WP = WalkerProperties::Indexes;
+
+enum PSetTimers
+{
+    PS_newpos,
+    PS_donePbyP,
+    PS_accept,
+    PS_loadWalker,
+    PS_update,
+    PS_dt_move,
+    PS_mw_copy
+};
+
+static const TimerNameList_t<PSetTimers>
+generatePSetTimerNames(std::string& obj_name)
+{
+    return {{PS_newpos, "ParticleSet:" + obj_name + "::computeNewPosDT"},
+        {PS_donePbyP, "ParticleSet:" + obj_name + "::donePbyP"},
+        {PS_accept, "ParticleSet:" + obj_name + "::acceptMove"},
+        {PS_loadWalker, "ParticleSet:" + obj_name + "::loadWalker"},
+        {PS_update, "ParticleSet:" + obj_name + "::update"},
+        {PS_dt_move, "ParticleSet:" + obj_name + "::dt_move"},
+        {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}};
+}
+
+template <typename T>
+ParticleSetT<T>::ParticleSetT(const SimulationCellT<T>& simulation_cell,
+    const DynamicCoordinateKind kind) :
+    quantum_domain(classical),
+    Properties(0, 0, 1, WP::MAXPROPERTIES),
+    simulation_cell_(simulation_cell),
+    same_mass_(true),
+    is_spinor_(false),
+    active_ptcl_(-1),
+    active_spin_val_(0.0),
+    myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName),
+        timer_level_medium),
+    myTwist(0.0),
+    ParentName("0"),
+    TotalNum(0),
+    group_offsets_(std::make_shared<Vector<int, OMPallocator<int>>>()),
+    coordinates_(createDynamicCoordinatesT<T>(kind))
+{
+    initPropertyList();
+}
+
+template <typename T>
+ParticleSetT<T>::ParticleSetT(const ParticleSetT& p) :
+    Properties(p.Properties),
+    simulation_cell_(p.simulation_cell_),
+    same_mass_(true),
+    is_spinor_(false),
+    active_ptcl_(-1),
+    active_spin_val_(0.0),
+    my_species_(p.getSpeciesSet()),
+    myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName),
+        timer_level_medium),
+    myTwist(0.0),
+    ParentName(p.parentName()),
+    group_offsets_(p.group_offsets_),
+    coordinates_(p.coordinates_->makeClone())
+{
+    setQuantumDomain(p.quantum_domain);
+
+    resize(p.getTotalNum());
+    R.InUnit = p.R.InUnit;
+    R = p.R;
+    spins = p.spins;
+    GroupID = p.GroupID;
+    is_spinor_ = p.is_spinor_;
+
+    // need explicit copy:
+    Mass = p.Mass;
+    Z = p.Z;
+    // std::ostringstream o;
+    // o<<p.getName()<<ObjectTag;
+    // this->setName(o.str());
+    // app_log() << "  Copying a particle set " << p.getName() << " to " <<
+    // this->getName() << " groups=" << groups() << std::endl;
+    myName = p.getName();
+    PropertyList.Names = p.PropertyList.Names;
+    PropertyList.Values = p.PropertyList.Values;
+    PropertyHistory = p.PropertyHistory;
+    Collectables = p.Collectables;
+    // construct the distance tables with the same order
+    for (int i = 0; i < p.DistTables.size(); ++i)
+        addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes());
+
+    if (p.structure_factor_)
+        structure_factor_ =
+            std::make_unique<StructFactT<T>>(*p.structure_factor_);
+    myTwist = p.myTwist;
+
+    G = p.G;
+    L = p.L;
+}
+
+template <typename T>
+ParticleSetT<T>::~ParticleSetT() = default;
+
+template <typename T>
+void
+ParticleSetT<T>::create(const std::vector<int>& agroup)
+{
+    auto& group_offsets(*group_offsets_);
+    group_offsets.resize(agroup.size() + 1);
+    group_offsets[0] = 0;
+    for (int is = 0; is < agroup.size(); is++)
+        group_offsets[is + 1] = group_offsets[is] + agroup[is];
+    group_offsets.updateTo();
+    const size_t nsum = group_offsets[agroup.size()];
+    resize(nsum);
+    TotalNum = nsum;
+    int loc = 0;
+    for (int i = 0; i < agroup.size(); i++)
+        for (int j = 0; j < agroup[i]; j++, loc++)
+            GroupID[loc] = i;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::setQuantumDomain(quantum_domains qdomain)
+{
+    if (quantumDomainValid(qdomain))
+        quantum_domain = qdomain;
+    else
+        throw std::runtime_error("ParticleSet::setQuantumDomain\n  input "
+                                 "quantum domain is not valid for particles");
+}
+
+template <typename T>
+void
+ParticleSetT<T>::resetGroups()
+{
+    const int nspecies = my_species_.getTotalNum();
+    // Usually an empty ParticleSet indicates an error in the input file,
+    // but in some cases it is useful.  Allow an empty ParticleSet if it
+    // has the special name "empty".
+    if (nspecies == 0 && getName() != "empty") {
+        throw std::runtime_error(
+            "ParticleSet::resetGroups() Failed. No species exisits");
+    }
+    int natt = my_species_.numAttributes();
+    int qind = my_species_.addAttribute("charge");
+    if (natt == qind) {
+        app_log() << " Missing charge attribute of the SpeciesSet " << myName
+                  << " particleset" << std::endl;
+        app_log() << " Assume neutral particles Z=0.0 " << std::endl;
+        for (int ig = 0; ig < nspecies; ig++)
+            my_species_(qind, ig) = 0.0;
+    }
+    for (int iat = 0; iat < Z.size(); iat++)
+        Z[iat] = my_species_(qind, GroupID[iat]);
+    natt = my_species_.numAttributes();
+    int massind = my_species_.addAttribute("mass");
+    if (massind == natt) {
+        for (int ig = 0; ig < nspecies; ig++)
+            my_species_(massind, ig) = 1.0;
+    }
+    same_mass_ = true;
+    double m0 = my_species_(massind, 0);
+    for (int ig = 1; ig < nspecies; ig++)
+        same_mass_ &= (my_species_(massind, ig) == m0);
+    if (same_mass_)
+        app_log() << "  All the species have the same mass " << m0 << std::endl;
+    else
+        app_log() << "  Distinctive masses for each species " << std::endl;
+    for (int iat = 0; iat < Mass.size(); iat++)
+        Mass[iat] = my_species_(massind, GroupID[iat]);
+
+    int membersize = my_species_.addAttribute("membersize");
+    for (int ig = 0; ig < nspecies; ++ig)
+        my_species_(membersize, ig) = groupsize(ig);
+
+    for (int iat = 0; iat < GroupID.size(); iat++)
+        assert(GroupID[iat] < nspecies);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::randomizeFromSource(ParticleSetT& src)
+{
+    SpeciesSet& srcSpSet(src.getSpeciesSet());
+    SpeciesSet& spSet(getSpeciesSet());
+    int srcChargeIndx = srcSpSet.addAttribute("charge");
+    int srcMemberIndx = srcSpSet.addAttribute("membersize");
+    int ChargeIndex = spSet.addAttribute("charge");
+    int MemberIndx = spSet.addAttribute("membersize");
+    int Nsrc = src.getTotalNum();
+    int Nptcl = getTotalNum();
+    int NumSpecies = spSet.TotalNum;
+    int NumSrcSpecies = srcSpSet.TotalNum;
+    // Store information about charges and number of each species
+    std::vector<int> Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec;
+    Zat.resize(Nsrc);
+    Zspec.resize(NumSrcSpecies);
+    NofSpecies.resize(NumSpecies);
+    CurElec.resize(NumSpecies);
+    NofSrcSpecies.resize(NumSrcSpecies);
+    for (int spec = 0; spec < NumSrcSpecies; spec++) {
+        Zspec[spec] = (int)round(srcSpSet(srcChargeIndx, spec));
+        NofSrcSpecies[spec] = (int)round(srcSpSet(srcMemberIndx, spec));
+    }
+    for (int spec = 0; spec < NumSpecies; spec++) {
+        NofSpecies[spec] = (int)round(spSet(MemberIndx, spec));
+        CurElec[spec] = first(spec);
+    }
+    int totQ = 0;
+    for (int iat = 0; iat < Nsrc; iat++)
+        totQ += Zat[iat] = Zspec[src.GroupID[iat]];
+    app_log() << "  Total ion charge    = " << totQ << std::endl;
+    totQ -= Nptcl;
+    app_log() << "  Total system charge = " << totQ << std::endl;
+    // Now, loop over ions, attaching electrons to them to neutralize
+    // charge
+    int spToken = 0;
+    // This is decremented when we run out of electrons in each species
+    int spLeft = NumSpecies;
+    std::vector<PosType> gaussRand(Nptcl);
+    makeGaussRandom(gaussRand);
+    for (int iat = 0; iat < Nsrc; iat++) {
+        // Loop over electrons to add, selecting round-robin from the
+        // electron species
+        int z = Zat[iat];
+        while (z > 0 && spLeft) {
+            int sp = spToken++ % NumSpecies;
+            if (NofSpecies[sp]) {
+                NofSpecies[sp]--;
+                z--;
+                int elec = CurElec[sp]++;
+                app_log() << "  Assigning " << (sp ? "down" : "up  ")
+                          << " electron " << elec << " to ion " << iat
+                          << " with charge " << z << std::endl;
+                double radius = 0.5 * std::sqrt((double)Zat[iat]);
+                R[elec] = src.R[iat] + radius * gaussRand[elec];
+            }
+            else
+                spLeft--;
+        }
+    }
+    // Assign remaining electrons
+    int ion = 0;
+    for (int sp = 0; sp < NumSpecies; sp++) {
+        for (int ie = 0; ie < NofSpecies[sp]; ie++) {
+            int iat = ion++ % Nsrc;
+            double radius = std::sqrt((double)Zat[iat]);
+            int elec = CurElec[sp]++;
+            R[elec] = src.R[iat] + radius * gaussRand[elec];
+        }
+    }
+}
+
+template <typename T>
+void
+ParticleSetT<T>::print(std::ostream& os, const size_t maxParticlesToPrint) const
+{
+    os << "  ParticleSet '" << getName() << "' contains " << TotalNum
+       << " particles : ";
+    if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0)
+        for (int i = 0; i < group_offsets.size() - 1; i++)
+            os << " " << my_species_.speciesName[i] << "("
+               << group_offsets[i + 1] - group_offsets[i] << ")";
+    os << std::endl << std::endl;
+
+    const size_t numToPrint = maxParticlesToPrint == 0 ?
+        TotalNum :
+        std::min(TotalNum, maxParticlesToPrint);
+
+    for (int i = 0; i < numToPrint; i++) {
+        os << "    " << my_species_.speciesName[GroupID[i]] << R[i]
+           << std::endl;
+    }
+    if (numToPrint < TotalNum) {
+        os << "    (... and " << (TotalNum - numToPrint)
+           << " more particle positions ...)" << std::endl;
+    }
+    os << std::endl;
+
+    for (const std::string& description : distTableDescriptions)
+        os << description;
+    os << std::endl;
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::get(std::ostream& is) const
+{
+    return true;
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::put(std::istream& is)
+{
+    return true;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::reset()
+{
+    app_log() << "<<<< going to set properties >>>> " << std::endl;
+}
+
+/// read the particleset
+template <typename T>
+bool
+ParticleSetT<T>::put(xmlNodePtr cur)
+{
+    return true;
+}
+
+template <typename T>
+int
+ParticleSetT<T>::addTable(const ParticleSetT& psrc, DTModes modes)
+{
+    if (myName == "none" || psrc.getName() == "none")
+        throw std::runtime_error("ParticleSet::addTable needs proper names for "
+                                 "both source and target particle sets.");
+
+    int tid;
+    std::map<std::string, int>::iterator tit(
+        myDistTableMap.find(psrc.getName()));
+    if (tit == myDistTableMap.end()) {
+        std::ostringstream description;
+        tid = DistTables.size();
+        if (myName == psrc.getName())
+            DistTables.push_back(createDistanceTableT(*this, description));
+        else
+            DistTables.push_back(
+                createDistanceTableT(psrc, *this, description));
+        distTableDescriptions.push_back(description.str());
+        myDistTableMap[psrc.getName()] = tid;
+        app_debug() << "  ... ParticleSet::addTable Create Table #" << tid
+                    << " " << DistTables[tid]->getName() << std::endl;
+    }
+    else {
+        tid = (*tit).second;
+        app_debug() << "  ... ParticleSet::addTable Reuse Table #" << tid << " "
+                    << DistTables[tid]->getName() << std::endl;
+    }
+
+    DistTables[tid]->setModes(DistTables[tid]->getModes() | modes);
+
+    app_log().flush();
+    return tid;
+}
+
+template <typename T>
+const DistanceTableAAT<T>&
+ParticleSetT<T>::getDistTableAA(int table_ID) const
+{
+    return dynamic_cast<DistanceTableAAT<T>&>(*DistTables[table_ID]);
+}
+
+template <typename T>
+const DistanceTableABT<T>&
+ParticleSetT<T>::getDistTableAB(int table_ID) const
+{
+    return dynamic_cast<DistanceTableABT<T>&>(*DistTables[table_ID]);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::update(bool skipSK)
+{
+    ScopedTimer update_scope(myTimers[PS_update]);
+
+    coordinates_->setAllParticlePos(R);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->evaluate(*this);
+    if (!skipSK && structure_factor_)
+        structure_factor_->updateAllPart(*this);
+
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_update(
+    const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
+{
+    auto& p_leader = p_list.getLeader();
+    ScopedTimer update_scope(p_leader.myTimers[PS_update]);
+
+    for (ParticleSetT& pset : p_list)
+        pset.coordinates_->setAllParticlePos(pset.R);
+
+    auto& dts = p_leader.DistTables;
+    for (int i = 0; i < dts.size(); ++i) {
+        const auto dt_list(extractDTRefList(p_list, i));
+        dts[i]->mw_evaluate(dt_list, p_list);
+    }
+
+    if (!skipSK && p_leader.structure_factor_)
+        for (int iw = 0; iw < p_list.size(); iw++)
+            p_list[iw].structure_factor_->updateAllPart(p_list[iw]);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::makeMove(
+    Index_t iat, const SingleParticlePos& displ, bool maybe_accept)
+{
+    active_ptcl_ = iat;
+    active_pos_ = R[iat] + displ;
+    active_spin_val_ = spins[iat];
+    computeNewPosDistTables(iat, active_pos_, maybe_accept);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::makeMoveWithSpin(
+    Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
+{
+    makeMove(iat, displ);
+    active_spin_val_ += sdispl;
+}
+
+template <typename T>
+template <CoordsType CT>
+void
+ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat, const MCCoordsT<T, CT>& displs)
+{
+    mw_makeMove(p_list, iat, displs.positions);
+    if constexpr (CT == CoordsType::POS_SPIN)
+        mw_makeSpinMove(p_list, iat, displs.spins);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat, const std::vector<SingleParticlePos>& displs)
+{
+    std::vector<SingleParticlePos> new_positions;
+    new_positions.reserve(displs.size());
+
+    for (int iw = 0; iw < p_list.size(); iw++) {
+        p_list[iw].active_ptcl_ = iat;
+        p_list[iw].active_pos_ = p_list[iw].R[iat] + displs[iw];
+        new_positions.push_back(p_list[iw].active_pos_);
+    }
+
+    mw_computeNewPosDistTables(p_list, iat, new_positions);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_makeSpinMove(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<Scalar_t>& sdispls)
+{
+    for (int iw = 0; iw < p_list.size(); iw++)
+        p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw];
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ)
+{
+    active_ptcl_ = iat;
+    active_pos_ = R[iat] + displ;
+    active_spin_val_ = spins[iat];
+    bool is_valid = true;
+    auto& Lattice = simulation_cell_.getLattice();
+    if (Lattice.explicitly_defined) {
+        if (Lattice.outOfBound(Lattice.toUnit(displ)))
+            is_valid = false;
+        else {
+            SingleParticlePos newRedPos = Lattice.toUnit(active_pos_);
+            if (!Lattice.isValid(newRedPos))
+                is_valid = false;
+        }
+    }
+    computeNewPosDistTables(iat, active_pos_, true);
+    return is_valid;
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAndCheckWithSpin(
+    Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
+{
+    bool is_valid = makeMoveAndCheck(iat, displ);
+    active_spin_val_ += sdispl;
+    return is_valid;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::computeNewPosDistTables(
+    Index_t iat, const SingleParticlePos& newpos, bool maybe_accept)
+{
+    ScopedTimer compute_newpos_scope(myTimers[PS_newpos]);
+
+    for (int i = 0; i < DistTables.size(); ++i)
+        DistTables[i]->move(*this, newpos, iat, maybe_accept);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_computeNewPosDistTables(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<SingleParticlePos>& new_positions, bool maybe_accept)
+{
+    ParticleSetT& p_leader = p_list.getLeader();
+    ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]);
+
+    {
+        ScopedTimer copy_scope(p_leader.myTimers[PS_mw_copy]);
+        const auto coords_list(extractCoordsRefList(p_list));
+        p_leader.coordinates_->mw_copyActivePos(
+            coords_list, iat, new_positions);
+    }
+
+    {
+        ScopedTimer dt_scope(p_leader.myTimers[PS_dt_move]);
+        const int dist_tables_size = p_leader.DistTables.size();
+        for (int i = 0; i < dist_tables_size; ++i) {
+            const auto dt_list(extractDTRefList(p_list, i));
+            p_leader.DistTables[i]->mw_move(
+                dt_list, p_list, new_positions, iat, maybe_accept);
+        }
+
+        // DistTables mw_move calls are asynchronous. Wait for them before
+        // return.
+        PRAGMA_OFFLOAD("omp taskwait")
+    }
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAllParticles(
+    const Walker_t& awalker, const ParticlePos& deltaR, RealType dt)
+{
+    active_ptcl_ = -1;
+    auto& Lattice = simulation_cell_.getLattice();
+    if (Lattice.explicitly_defined) {
+        for (int iat = 0; iat < deltaR.size(); ++iat) {
+            SingleParticlePos displ(dt * deltaR[iat]);
+            if (Lattice.outOfBound(Lattice.toUnit(displ)))
+                return false;
+            SingleParticlePos newpos(awalker.R[iat] + displ);
+            if (!Lattice.isValid(Lattice.toUnit(newpos)))
+                return false;
+            R[iat] = newpos;
+        }
+    }
+    else {
+        for (int iat = 0; iat < deltaR.size(); ++iat)
+            R[iat] = awalker.R[iat] + dt * deltaR[iat];
+    }
+    coordinates_->setAllParticlePos(R);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->evaluate(*this);
+    if (structure_factor_)
+        structure_factor_->updateAllPart(*this);
+    // every move is valid
+    return true;
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAllParticles(const Walker_t& awalker,
+    const ParticlePos& deltaR, const std::vector<RealType>& dt)
+{
+    active_ptcl_ = -1;
+    auto& Lattice = simulation_cell_.getLattice();
+    if (Lattice.explicitly_defined) {
+        for (int iat = 0; iat < deltaR.size(); ++iat) {
+            SingleParticlePos displ(dt[iat] * deltaR[iat]);
+            if (Lattice.outOfBound(Lattice.toUnit(displ)))
+                return false;
+            SingleParticlePos newpos(awalker.R[iat] + displ);
+            if (!Lattice.isValid(Lattice.toUnit(newpos)))
+                return false;
+            R[iat] = newpos;
+        }
+    }
+    else {
+        for (int iat = 0; iat < deltaR.size(); ++iat)
+            R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat];
+    }
+    coordinates_->setAllParticlePos(R);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->evaluate(*this);
+    if (structure_factor_)
+        structure_factor_->updateAllPart(*this);
+    // every move is valid
+    return true;
+}
+
+/** move a walker by dt*deltaR + drift
+ * @param awalker initial walker configuration
+ * @param drift drift vector
+ * @param deltaR random displacement
+ * @param dt timestep
+ * @return true, if all the particle moves are legal under the boundary
+ * conditions
+ */
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+    const ParticlePos& drift, const ParticlePos& deltaR, RealType dt)
+{
+    active_ptcl_ = -1;
+    auto& Lattice = simulation_cell_.getLattice();
+    if (Lattice.explicitly_defined) {
+        for (int iat = 0; iat < deltaR.size(); ++iat) {
+            SingleParticlePos displ(dt * deltaR[iat] + drift[iat]);
+            if (Lattice.outOfBound(Lattice.toUnit(displ)))
+                return false;
+            SingleParticlePos newpos(awalker.R[iat] + displ);
+            if (!Lattice.isValid(Lattice.toUnit(newpos)))
+                return false;
+            R[iat] = newpos;
+        }
+    }
+    else {
+        for (int iat = 0; iat < deltaR.size(); ++iat)
+            R[iat] = awalker.R[iat] + dt * deltaR[iat] + drift[iat];
+    }
+    coordinates_->setAllParticlePos(R);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->evaluate(*this);
+    if (structure_factor_)
+        structure_factor_->updateAllPart(*this);
+    // every move is valid
+    return true;
+}
+
+template <typename T>
+bool
+ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+    const ParticlePos& drift, const ParticlePos& deltaR,
+    const std::vector<RealType>& dt)
+{
+    active_ptcl_ = -1;
+    auto& Lattice = simulation_cell_.getLattice();
+    if (Lattice.explicitly_defined) {
+        for (int iat = 0; iat < deltaR.size(); ++iat) {
+            SingleParticlePos displ(dt[iat] * deltaR[iat] + drift[iat]);
+            if (Lattice.outOfBound(Lattice.toUnit(displ)))
+                return false;
+            SingleParticlePos newpos(awalker.R[iat] + displ);
+            if (!Lattice.isValid(Lattice.toUnit(newpos)))
+                return false;
+            R[iat] = newpos;
+        }
+    }
+    else {
+        for (int iat = 0; iat < deltaR.size(); ++iat)
+            R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat] + drift[iat];
+    }
+    coordinates_->setAllParticlePos(R);
+
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->evaluate(*this);
+    if (structure_factor_)
+        structure_factor_->updateAllPart(*this);
+    // every move is valid
+    return true;
+}
+
+/** update the particle attribute by the proposed move
+ *
+ * When the active_ptcl_ is equal to iat, overwrite the position and update the
+ * content of the distance tables.
+ */
+template <typename T>
+void
+ParticleSetT<T>::acceptMove(Index_t iat)
+{
+#ifndef NDEBUG
+    if (iat != active_ptcl_)
+        throw std::runtime_error(
+            "Bug detected by acceptMove! Request electron is not active!");
+#endif
+    ScopedTimer update_scope(myTimers[PS_accept]);
+    // Update position + distance-table
+    coordinates_->setOneParticlePos(active_pos_, iat);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->update(iat);
+
+    R[iat] = active_pos_;
+    spins[iat] = active_spin_val_;
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::acceptMoveForwardMode(Index_t iat)
+{
+    assert(iat == active_ptcl_);
+    ScopedTimer update_scope(myTimers[PS_accept]);
+    // Update position + distance-table
+    coordinates_->setOneParticlePos(active_pos_, iat);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->updatePartial(iat, true);
+
+    R[iat] = active_pos_;
+    spins[iat] = active_spin_val_;
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::accept_rejectMove(
+    Index_t iat, bool accepted, bool forward_mode)
+{
+    if (forward_mode)
+        if (accepted)
+            acceptMoveForwardMode(iat);
+        else
+            rejectMoveForwardMode(iat);
+    else if (accepted)
+        acceptMove(iat);
+    else
+        rejectMove(iat);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::rejectMove(Index_t iat)
+{
+#ifndef NDEBUG
+    if (iat != active_ptcl_)
+        throw std::runtime_error(
+            "Bug detected by rejectMove! Request electron is not active!");
+#endif
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::rejectMoveForwardMode(Index_t iat)
+{
+    assert(iat == active_ptcl_);
+    // Update distance-table
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->updatePartial(iat, false);
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+template <CoordsType CT>
+void
+ParticleSetT<T>::mw_accept_rejectMoveT(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode)
+{
+    if constexpr (CT == CoordsType::POS_SPIN)
+        mw_accept_rejectSpinMove(p_list, iat, isAccepted);
+    mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_accept_rejectMove(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode)
+{
+    if (forward_mode) {
+        ParticleSetT& p_leader = p_list.getLeader();
+        ScopedTimer update_scope(p_leader.myTimers[PS_accept]);
+
+        const auto coords_list(extractCoordsRefList(p_list));
+        std::vector<SingleParticlePos> new_positions;
+        new_positions.reserve(p_list.size());
+        for (const ParticleSetT& pset : p_list)
+            new_positions.push_back(pset.active_pos_);
+        p_leader.coordinates_->mw_acceptParticlePos(
+            coords_list, iat, new_positions, isAccepted);
+
+        auto& dts = p_leader.DistTables;
+        for (int i = 0; i < dts.size(); ++i) {
+            const auto dt_list(extractDTRefList(p_list, i));
+            dts[i]->mw_updatePartial(dt_list, iat, isAccepted);
+        }
+
+        for (int iw = 0; iw < p_list.size(); iw++) {
+            assert(iat == p_list[iw].active_ptcl_);
+            if (isAccepted[iw])
+                p_list[iw].R[iat] = p_list[iw].active_pos_;
+            p_list[iw].active_ptcl_ = -1;
+            assert(p_list[iw].R[iat] ==
+                p_list[iw].coordinates_->getAllParticlePos()[iat]);
+        }
+    }
+    else {
+        // loop over single walker acceptMove/rejectMove doesn't work safely.
+        // need to code carefully for both coordinate and distance table updates
+        // disable non-forward mode cases
+        if (!forward_mode)
+            throw std::runtime_error(
+                "BUG calling mw_accept_rejectMove in non-forward mode");
+    }
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_accept_rejectSpinMove(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted)
+{
+    for (int iw = 0; iw < p_list.size(); iw++) {
+        assert(iat == p_list[iw].active_ptcl_);
+        if (isAccepted[iw])
+            p_list[iw].spins[iat] = p_list[iw].active_spin_val_;
+    }
+}
+
+template <typename T>
+void
+ParticleSetT<T>::donePbyP(bool skipSK)
+{
+    ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]);
+    coordinates_->donePbyP();
+    if (!skipSK && structure_factor_)
+        structure_factor_->updateAllPart(*this);
+    for (size_t i = 0; i < DistTables.size(); ++i)
+        DistTables[i]->finalizePbyP(*this);
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_donePbyP(
+    const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
+{
+    ParticleSetT& p_leader = p_list.getLeader();
+    ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]);
+
+    for (ParticleSetT& pset : p_list) {
+        pset.coordinates_->donePbyP();
+        pset.active_ptcl_ = -1;
+    }
+
+    if (!skipSK && p_leader.structure_factor_) {
+        auto sk_list = extractSKRefList(p_list);
+        StructFactT<T>::mw_updateAllPart(
+            sk_list, p_list, p_leader.mw_structure_factor_data_handle_);
+    }
+
+    auto& dts = p_leader.DistTables;
+    for (int i = 0; i < dts.size(); ++i) {
+        const auto dt_list(extractDTRefList(p_list, i));
+        dts[i]->mw_finalizePbyP(dt_list, p_list);
+    }
+}
+
+template <typename T>
+void
+ParticleSetT<T>::makeVirtualMoves(const SingleParticlePos& newpos)
+{
+    active_ptcl_ = -1;
+    active_pos_ = newpos;
+    for (size_t i = 0; i < DistTables.size(); ++i)
+        DistTables[i]->move(*this, newpos, active_ptcl_, false);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::loadWalker(Walker_t& awalker, bool pbyp)
+{
+    ScopedTimer update_scope(myTimers[PS_loadWalker]);
+    R = awalker.R;
+    spins = awalker.spins;
+    coordinates_->setAllParticlePos(R);
+#if !defined(SOA_MEMORY_OPTIMIZED)
+    G = awalker.G;
+    L = awalker.L;
+#endif
+    if (pbyp) {
+        // in certain cases, full tables must be ready
+        for (int i = 0; i < DistTables.size(); i++)
+            if (DistTables[i]->getModes() & DTModes::NEED_FULL_TABLE_ANYTIME)
+                DistTables[i]->evaluate(*this);
+    }
+
+    active_ptcl_ = -1;
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
+    const RefVector<Walker_t>& walkers, const std::vector<bool>& recompute,
+    bool pbyp)
+{
+    auto& p_leader = p_list.getLeader();
+    ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]);
+
+    auto loadWalkerConfig = [](ParticleSetT& pset, Walker_t& awalker) {
+        pset.R = awalker.R;
+        pset.spins = awalker.spins;
+        pset.coordinates_->setAllParticlePos(pset.R);
+    };
+    for (int iw = 0; iw < p_list.size(); ++iw)
+        if (recompute[iw])
+            loadWalkerConfig(p_list[iw], walkers[iw]);
+
+    if (pbyp) {
+        auto& dts = p_leader.DistTables;
+        for (int i = 0; i < dts.size(); ++i) {
+            const auto dt_list(extractDTRefList(p_list, i));
+            dts[i]->mw_recompute(dt_list, p_list, recompute);
+        }
+    }
+}
+
+template <typename T>
+void
+ParticleSetT<T>::saveWalker(Walker_t& awalker)
+{
+    awalker.R = R;
+    awalker.spins = spins;
+#if !defined(SOA_MEMORY_OPTIMIZED)
+    awalker.G = G;
+    awalker.L = L;
+#endif
+}
+
+template <typename T>
+void
+ParticleSetT<T>::mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets,
+    const RefVector<Walker_t>& walkers)
+{
+    for (int iw = 0; iw < psets.size(); ++iw)
+        psets[iw].saveWalker(walkers[iw]);
+}
+
+template <typename T>
+void
+ParticleSetT<T>::initPropertyList()
+{
+    PropertyList.clear();
+    // Need to add the default Properties according to the enumeration
+    PropertyList.add("LogPsi");
+    PropertyList.add("SignPsi");
+    PropertyList.add("UmbrellaWeight");
+    PropertyList.add("R2Accepted");
+    PropertyList.add("R2Proposed");
+    PropertyList.add("DriftScale");
+    PropertyList.add("AltEnergy");
+    PropertyList.add("LocalEnergy");
+    PropertyList.add("LocalPotential");
+
+    // There is no point in checking this, its quickly not consistent as other
+    // objects update property list. if (PropertyList.size() !=
+    // WP::NUMPROPERTIES)
+    // {
+    //   app_error() << "The number of default properties for walkers  is not
+    //   consistent." << std::endl; app_error() << "NUMPROPERTIES " <<
+    //   WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() <<
+    //   std::endl; throw std::runtime_error("ParticleSet::initPropertyList");
+    // }
+}
+
+template <typename T>
+int
+ParticleSetT<T>::addPropertyHistory(int leng)
+{
+    int newL = PropertyHistory.size();
+    PropertyHistory.push_back(std::vector<FullPrecRealType>(leng, 0.0));
+    PHindex.push_back(0);
+    return newL;
+}
+
+//      void ParticleSet::resetPropertyHistory( )
+//     {
+//       for(int i=0;i<PropertyHistory.size();i++)
+//       {
+//         PHindex[i]=0;
+//  for(int k=0;k<PropertyHistory[i].size();k++)
+//  {
+//    PropertyHistory[i][k]=0.0;
+//  }
+//       }
+//     }
+
+//      void ParticleSet::addPropertyHistoryPoint(int index, RealType data)
+//     {
+//       PropertyHistory[index][PHindex[index]]=(data);
+//       PHindex[index]++;
+//       if (PHindex[index]==PropertyHistory[index].size()) PHindex[index]=0;
+// //       PropertyHistory[index].pop_back();
+//     }
+
+//      void ParticleSet::rejectedMove()
+//     {
+//       for(int dindex=0;dindex<PropertyHistory.size();dindex++){
+//         int lastIndex=PHindex[dindex]-1;
+//         if (lastIndex<0) lastIndex+=PropertyHistory[dindex].size();
+//         PropertyHistory[dindex][PHindex[dindex]]=PropertyHistory[dindex][lastIndex];
+//         PHindex[dindex]++;
+//         if (PHindex[dindex]==PropertyHistory[dindex].size())
+//         PHindex[dindex]=0;
+// //       PropertyHistory[dindex].push_front(PropertyHistory[dindex].front());
+// //       PropertyHistory[dindex].pop_back();
+//       }
+//     }
+
+template <typename T>
+void
+ParticleSetT<T>::createResource(ResourceCollection& collection) const
+{
+    coordinates_->createResource(collection);
+    for (int i = 0; i < DistTables.size(); i++)
+        DistTables[i]->createResource(collection);
+    if (structure_factor_)
+        collection.addResource(std::make_unique<SKMultiWalkerMemT<T>>());
+}
+
+template <typename T>
+void
+ParticleSetT<T>::acquireResource(ResourceCollection& collection,
+    const RefVectorWithLeader<ParticleSetT>& p_list)
+{
+    auto& ps_leader = p_list.getLeader();
+    ps_leader.coordinates_->acquireResource(
+        collection, extractCoordsRefList(p_list));
+    for (int i = 0; i < ps_leader.DistTables.size(); i++)
+        ps_leader.DistTables[i]->acquireResource(
+            collection, extractDTRefList(p_list, i));
+
+    if (ps_leader.structure_factor_)
+        p_list.getLeader().mw_structure_factor_data_handle_ =
+            collection.lendResource<SKMultiWalkerMemT<T>>();
+}
+
+template <typename T>
+void
+ParticleSetT<T>::releaseResource(ResourceCollection& collection,
+    const RefVectorWithLeader<ParticleSetT>& p_list)
+{
+    auto& ps_leader = p_list.getLeader();
+    ps_leader.coordinates_->releaseResource(
+        collection, extractCoordsRefList(p_list));
+    for (int i = 0; i < ps_leader.DistTables.size(); i++)
+        ps_leader.DistTables[i]->releaseResource(
+            collection, extractDTRefList(p_list, i));
+
+    if (ps_leader.structure_factor_)
+        collection.takebackResource(
+            p_list.getLeader().mw_structure_factor_data_handle_);
+}
+
+template <typename T>
+RefVectorWithLeader<DistanceTableT<T>>
+ParticleSetT<T>::extractDTRefList(
+    const RefVectorWithLeader<ParticleSetT>& p_list, int id)
+{
+    RefVectorWithLeader<DistanceTableT<T>> dt_list(
+        *p_list.getLeader().DistTables[id]);
+    dt_list.reserve(p_list.size());
+    for (ParticleSetT& p : p_list)
+        dt_list.push_back(*p.DistTables[id]);
+    return dt_list;
+}
+
+template <typename T>
+RefVectorWithLeader<DynamicCoordinatesT<T>>
+ParticleSetT<T>::extractCoordsRefList(
+    const RefVectorWithLeader<ParticleSetT>& p_list)
+{
+    RefVectorWithLeader<DynamicCoordinatesT<T>> coords_list(
+        *p_list.getLeader().coordinates_);
+    coords_list.reserve(p_list.size());
+    for (ParticleSetT& p : p_list)
+        coords_list.push_back(*p.coordinates_);
+    return coords_list;
+}
+
+template <typename T>
+RefVectorWithLeader<StructFactT<T>>
+ParticleSetT<T>::extractSKRefList(
+    const RefVectorWithLeader<ParticleSetT>& p_list)
+{
+    RefVectorWithLeader<StructFactT<T>> sk_list(
+        *p_list.getLeader().structure_factor_);
+    sk_list.reserve(p_list.size());
+    for (ParticleSetT& p : p_list)
+        sk_list.push_back(*p.structure_factor_);
+    return sk_list;
+}
+
+// explicit instantiations
+template class ParticleSetT<double>;
+template class ParticleSetT<float>;
+template class ParticleSetT<std::complex<double>>;
+template class ParticleSetT<std::complex<float>>;
+
+template void
+ParticleSetT<double>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<double, CoordsType::POS>& displs);
+template void
+ParticleSetT<double>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<double, CoordsType::POS_SPIN>& displs);
+template void
+ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+template void
+ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+
+template void
+ParticleSetT<float>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<float, CoordsType::POS>& displs);
+template void
+ParticleSetT<float>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<float, CoordsType::POS_SPIN>& displs);
+template void
+ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+template void
+ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+
+template void
+ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<std::complex<double>, CoordsType::POS>& displs);
+template void
+ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<std::complex<double>, CoordsType::POS_SPIN>& displs);
+template void
+ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+template void
+ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+
+template void
+ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<std::complex<float>, CoordsType::POS>& displs);
+template void
+ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const MCCoordsT<std::complex<float>, CoordsType::POS_SPIN>& displs);
+template void
+ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+template void
+ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+    const std::vector<bool>& isAccepted, bool forward_mode);
+} // namespace qmcplusplus
diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h
new file mode 100644
index 0000000000..138b352616
--- /dev/null
+++ b/src/Particle/ParticleSetT.h
@@ -0,0 +1,980 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: D. Das, University of Illinois at Urbana-Champaign
+//                    Bryan Clark, bclark@Princeton.edu, Princeton University
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
+//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_PARTICLESETT_H
+#define QMCPLUSPLUS_PARTICLESETT_H
+
+#include <memory>
+
+#include "DTModes.h"
+#include "DynamicCoordinatesT.h"
+#include "MCCoordsT.hpp"
+#include "OhmmsPETE/OhmmsArray.h"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "Particle/ParticleSetTraits.h"
+#include "ParticleTags.h"
+#include "Pools/PooledData.h"
+#include "ResourceHandle.h"
+#include "SimulationCellT.h"
+#include "SpeciesSet.h"
+#include "Utilities/TimerManager.h"
+#include "Walker.h"
+#include "type_traits/template_types.hpp"
+
+namespace qmcplusplus
+{
+/// forward declarations
+template <typename T>
+class DistanceTableT;
+template <typename T>
+class DistanceTableAAT;
+template <typename T>
+class DistanceTableABT;
+class ResourceCollection;
+template <typename T>
+class StructFactT;
+template <typename T>
+struct SKMultiWalkerMemT;
+
+/** Specialized paritlce class for atomistic simulations
+ *
+ * Derived from QMCTraits, ParticleBase<PtclOnLatticeTraits> and
+ * OhmmsElementBase. The ParticleLayout class represents a supercell
+ * with/without periodic boundary conditions. The ParticleLayout class also
+ * takes care of spatial decompositions for efficient evaluations for the
+ * interactions with a finite cutoff.
+ */
+template <typename T>
+class ParticleSetT : public OhmmsElementBase
+{
+public:
+    using RealType = typename ParticleSetTraits<T>::RealType;
+    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
+    using ComplexType = typename ParticleSetTraits<T>::ComplexType;
+    using PosType = typename ParticleSetTraits<T>::PosType;
+
+    using PropertySetType = typename ParticleSetTraits<T>::PropertySetType;
+
+    using Index_t = typename LatticeParticleTraits<T>::Index_t;
+    using Scalar_t = typename LatticeParticleTraits<T>::Scalar_t;
+    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
+    using SingleParticlePos =
+        typename LatticeParticleTraits<T>::SingleParticlePos;
+    using ParticleIndex = typename LatticeParticleTraits<T>::ParticleIndex;
+    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
+    using ParticleScalar = typename LatticeParticleTraits<T>::ParticleScalar;
+    using ParticleGradient =
+        typename LatticeParticleTraits<T>::ParticleGradient;
+    using ParticleLaplacian =
+        typename LatticeParticleTraits<T>::ParticleLaplacian;
+
+    /// walker type
+    using Walker_t = Walker<ParticleSetTraits<T>, LatticeParticleTraits<T>>;
+    /// container type to store the property
+    using PropertyContainer_t = typename Walker_t::PropertyContainer_t;
+    /// buffer type for a serialized buffer
+    using Buffer_t = PooledData<RealType>;
+
+    enum quantum_domains
+    {
+        no_quantum_domain = 0,
+        classical,
+        quantum
+    };
+
+    /// quantum_domain of the particles, default = classical
+    quantum_domains quantum_domain;
+
+    //@{ public data members
+    /// Species ID
+    ParticleIndex GroupID;
+    /// Position
+    ParticlePos R;
+    /// internal spin variables for dynamical spin calculations
+    ParticleScalar spins;
+    /// gradients of the particles
+    ParticleGradient G;
+    /// laplacians of the particles
+    ParticleLaplacian L;
+    /// mass of each particle
+    ParticleScalar Mass;
+    /// charge of each particle
+    ParticleScalar Z;
+
+    /// the index of the active bead for particle-by-particle moves
+    Index_t activeBead;
+    /// the direction reptile traveling
+    Index_t direction;
+
+    /// Particle density in G-space for MPC interaction
+    std::vector<TinyVector<int, OHMMS_DIM>> DensityReducedGvecs;
+    std::vector<ComplexType> Density_G;
+    Array<RealType, OHMMS_DIM> Density_r;
+
+    /// DFT potential
+    std::vector<TinyVector<int, OHMMS_DIM>> VHXCReducedGvecs;
+    std::vector<ComplexType> VHXC_G[2];
+    Array<RealType, OHMMS_DIM> VHXC_r[2];
+
+    /** name-value map of Walker Properties
+     *
+     * PropertyMap is used to keep the name-value mapping of
+     * Walker_t::Properties.  PropertyList::Values are not
+     * necessarily updated during the simulations.
+     */
+    PropertySetType PropertyList;
+
+    /** properties of the current walker
+     *
+     * The internal order is identical to PropertyList, which holds
+     * the matching names.
+     */
+    PropertyContainer_t Properties;
+
+    /** observables in addition to those registered in Properties/PropertyList
+     *
+     * Such observables as density, gofr, sk are not stored per walker but
+     * collected during QMC iterations.
+     */
+    Buffer_t Collectables;
+
+    /// Property history vector
+    std::vector<std::vector<FullPrecRealType>> PropertyHistory;
+    std::vector<int> PHindex;
+    ///@}
+
+    /// current MC step
+    int current_step;
+
+    /// default constructor
+    ParticleSetT(const SimulationCellT<T>& simulation_cell,
+        const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS);
+
+    /// copy constructor
+    ParticleSetT(const ParticleSetT& p);
+
+    /// default destructor
+    ~ParticleSetT() override;
+
+    /** create grouped particles
+     * @param agroup number of particles per group
+     */
+    void
+    create(const std::vector<int>& agroup);
+
+    /** print particle coordinates to a std::ostream
+     * @param os output stream
+     * @param maxParticlesToPrint maximal number of particles to print. Pass 0
+     * to print all.
+     */
+    void
+    print(std::ostream& os, const size_t maxParticlesToPrint = 0) const;
+
+    /// dummy. For satisfying OhmmsElementBase.
+    bool
+    get(std::ostream& os) const override;
+    /// dummy. For satisfying OhmmsElementBase.
+    bool
+    put(std::istream&) override;
+    /// dummy. For satisfying OhmmsElementBase.
+    void
+    reset() override;
+
+    /// initialize ParticleSet from xmlNode
+    bool
+    put(xmlNodePtr cur) override;
+
+    /// specify quantum_domain of particles
+    void
+    setQuantumDomain(quantum_domains qdomain);
+
+    void
+    set_quantum()
+    {
+        quantum_domain = quantum;
+    }
+
+    inline bool
+    is_classical() const
+    {
+        return quantum_domain == classical;
+    }
+
+    inline bool
+    is_quantum() const
+    {
+        return quantum_domain == quantum;
+    }
+
+    /// check whether quantum domain is valid for particles
+    inline bool
+    quantumDomainValid(quantum_domains qdomain) const
+    {
+        return qdomain != no_quantum_domain;
+    }
+
+    /// check whether quantum domain is valid for particles
+    inline bool
+    quantumDomainValid() const
+    {
+        return quantumDomainValid(quantum_domain);
+    }
+
+    /** add a distance table
+     * @param psrc source particle set
+     * @param modes bitmask DistanceTable::DTModes
+     *
+     * if this->myName == psrc.getName(), AA type. Otherwise, AB type.
+     */
+    int
+    addTable(const ParticleSetT& psrc, DTModes modes = DTModes::ALL_OFF);
+
+    /// get a distance table by table_ID
+    inline auto&
+    getDistTable(int table_ID) const
+    {
+        return *DistTables[table_ID];
+    }
+    /// get a distance table by table_ID and dyanmic_cast to DistanceTableAA
+    const DistanceTableAAT<T>&
+    getDistTableAA(int table_ID) const;
+    /// get a distance table by table_ID and dyanmic_cast to DistanceTableAB
+    const DistanceTableABT<T>&
+    getDistTableAB(int table_ID) const;
+
+    /** reset all the collectable quantities during a MC iteration
+     */
+    inline void
+    resetCollectables()
+    {
+        std::fill(Collectables.begin(), Collectables.end(), 0.0);
+    }
+
+    /** update the internal data
+     *@param skip SK update if skipSK is true
+     */
+    void
+    update(bool skipSK = false);
+
+    /// batched version of update
+    static void
+    mw_update(
+        const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK = false);
+
+    /** create Structure Factor with PBCs
+     */
+    void
+    createSK();
+
+    bool
+    hasSK() const
+    {
+        return bool(structure_factor_);
+    }
+
+    /** return Structure Factor
+     */
+    const StructFactT<T>&
+    getSK() const
+    {
+        assert(structure_factor_);
+        return *structure_factor_;
+    };
+
+    /** Turn on per particle storage in Structure Factor
+     */
+    void
+    turnOnPerParticleSK();
+
+    /** Get state (on/off) of per particle storage in Structure Factor
+     */
+    bool
+    getPerParticleSKState() const;
+
+    /// retrun the SpeciesSet of this particle set
+    inline SpeciesSet&
+    getSpeciesSet()
+    {
+        return my_species_;
+    }
+    /// retrun the const SpeciesSet of this particle set
+    inline const SpeciesSet&
+    getSpeciesSet() const
+    {
+        return my_species_;
+    }
+
+    /// return parent's name
+    inline const std::string&
+    parentName() const
+    {
+        return ParentName;
+    }
+    inline void
+    setName(const std::string& aname)
+    {
+        myName = aname;
+        if (ParentName == "0") {
+            ParentName = aname;
+        }
+    }
+
+    inline const DynamicCoordinatesT<T>&
+    getCoordinates() const
+    {
+        return *coordinates_;
+    }
+
+    void
+    resetGroups();
+
+    const auto&
+    getSimulationCell() const
+    {
+        return simulation_cell_;
+    }
+    const auto&
+    getLattice() const
+    {
+        return simulation_cell_.getLattice();
+    }
+    auto&
+    getPrimitiveLattice() const
+    {
+        return const_cast<ParticleLayout&>(simulation_cell_.getPrimLattice());
+    }
+    const auto&
+    getLRBox() const
+    {
+        return simulation_cell_.getLRBox();
+    }
+
+    inline bool
+    isSameMass() const
+    {
+        return same_mass_;
+    }
+    inline bool
+    isSpinor() const
+    {
+        return is_spinor_;
+    }
+    inline void
+    setSpinor(bool is_spinor)
+    {
+        is_spinor_ = is_spinor;
+    }
+
+    /// return active particle id
+    inline Index_t
+    getActivePtcl() const
+    {
+        return active_ptcl_;
+    }
+    inline const PosType&
+    getActivePos() const
+    {
+        return active_pos_;
+    }
+    inline Scalar_t
+    getActiveSpinVal() const
+    {
+        return active_spin_val_;
+    }
+
+    /// return the active position if the particle is active or the return
+    /// current position if not
+    inline const PosType&
+    activeR(int iat) const
+    {
+        // When active_ptcl_ == iat, a move has been proposed.
+        return (active_ptcl_ == iat) ? active_pos_ : R[iat];
+    }
+
+    /// return the active spin value if the particle is active or return the
+    /// current spin value if not
+    inline const Scalar_t&
+    activeSpin(int iat) const
+    {
+        // When active_ptcl_ == iat, a move has been proposed.
+        return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat];
+    }
+
+    /** move the iat-th particle to active_pos_
+     * @param iat the index of the particle to be moved
+     * @param displ the displacement of the iat-th particle position
+     * @param maybe_accept if false, the caller guarantees that the proposed
+     * move will not be accepted.
+     *
+     * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a
+     * proposed move. Evaluate the related distance table data
+     * DistanceTable::Temp. If maybe_accept = false, certain operations for
+     * accepting moves will be skipped for optimal performance.
+     */
+    void
+    makeMove(
+        Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true);
+    /// makeMove, but now includes an update to the spin variable
+    void
+    makeMoveWithSpin(
+        Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
+
+    /// batched version of makeMove
+    template <CoordsType CT>
+    static void
+    mw_makeMove(const RefVectorWithLeader<ParticleSetT<T>>& p_list, Index_t iat,
+        const MCCoordsT<T, CT>& displs);
+
+    static void
+    mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+        const std::vector<SingleParticlePos>& displs);
+
+    /// batched version makeMove for spin variable only
+    static void
+    mw_makeSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+        Index_t iat, const std::vector<Scalar_t>& sdispls);
+
+    /** move the iat-th particle to active_pos_
+     * @param iat the index of the particle to be moved
+     * @param displ random displacement of the iat-th particle
+     * @return true, if the move is valid
+     *
+     * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a
+     * proposed move. Evaluate the related distance table data
+     * DistanceTable::Temp.
+     *
+     * When a Lattice is defined, passing two checks makes a move valid.
+     * outOfBound(displ): invalid move, if displ is larger than half, currently,
+     * of the box in any direction isValid(Lattice.toUnit(active_pos_)): invalid
+     * move, if active_pos_ goes out of the Lattice in any direction marked with
+     * open BC. Note: active_pos_ and distances tables are always evaluated no
+     * matter the move is valid or not.
+     */
+    bool
+    makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ);
+    /// makeMoveAndCheck, but now includes an update to the spin variable
+    bool
+    makeMoveAndCheckWithSpin(
+        Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
+
+    /** Handles virtual moves for all the particles to a single newpos.
+     *
+     * The state active_ptcl_ remains -1 and rejectMove is not needed.
+     * acceptMove can not be used.
+     * See QMCHamiltonians::MomentumEstimator as an example
+     */
+    void
+    makeVirtualMoves(const SingleParticlePos& newpos);
+
+    /** move all the particles of a walker
+     * @param awalker the walker to operate
+     * @param deltaR proposed displacement
+     * @param dt  factor of deltaR
+     * @return true if all the moves are legal.
+     *
+     * If big displacements or illegal positions are detected, return false.
+     * If all good, R = awalker.R + dt* deltaR
+     */
+    bool
+    makeMoveAllParticles(
+        const Walker_t& awalker, const ParticlePos& deltaR, RealType dt);
+
+    bool
+    makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR,
+        const std::vector<RealType>& dt);
+
+    /** move all the particles including the drift
+     *
+     * Otherwise, everything is the same as makeMove for a walker
+     */
+    bool
+    makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+        const ParticlePos& drift, const ParticlePos& deltaR, RealType dt);
+
+    bool
+    makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+        const ParticlePos& drift, const ParticlePos& deltaR,
+        const std::vector<RealType>& dt);
+
+    /** accept or reject a proposed move
+     *  Two operation modes:
+     *  The using and updating distance tables via `ParticleSet` operate in two
+     * modes, regular and forward modes.
+     *
+     *  Regular mode
+     *  The regular mode can only be used when the distance tables for particle
+     * pairs are fully up-to-date. This is the case after calling
+     * `ParticleSet::update()` in a unit test or after p-by-p moves in a QMC
+     * driver. In this mode, the distance tables remain up-to-date after calling
+     * `ParticleSet::acceptMove` and calling `ParticleSet::rejectMove` is not
+     * mandatory.
+     *
+     *  Forward mode
+     *  The forward mode assumes that distance table is not fully up-to-date
+     * until every particle is accepted or rejected to move once in order. This
+     * is the mode used in the p-by-p part of drivers. In this mode, calling
+     * `ParticleSet::accept_rejectMove` is required to handle accept/reject
+     * rather than calling individual `ParticleSet::acceptMove` and
+     * `ParticleSet::reject`. `ParticleSet::accept_rejectMove(iel)` ensures the
+     * distance tables (jel < iel) part is fully up-to-date regardless a move is
+     * accepted or rejected. For this reason, the rejecting operation inside
+     *  `ParticleSet::accept_rejectMove` involves writing the distances with
+     * respect to the old particle position.
+     */
+    void
+    accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true);
+
+    /** accept the move and update the particle attribute by the proposed move
+     *in regular mode
+     *@param iat the index of the particle whose position and other attributes
+     *to be updated
+     */
+    void
+    acceptMove(Index_t iat);
+
+    /** reject a proposed move in regular mode
+     * @param iat the electron whose proposed move gets rejected.
+     */
+    void
+    rejectMove(Index_t iat);
+
+    /// batched version of acceptMove and rejectMove fused, templated on
+    /// CoordsType
+    template <CoordsType CT>
+    static void
+    mw_accept_rejectMoveT(const RefVectorWithLeader<ParticleSetT>& p_list,
+        Index_t iat, const std::vector<bool>& isAccepted,
+        bool forward_mode = true);
+
+    /// batched version of acceptMove and rejectMove fused
+    static void
+    mw_accept_rejectMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+        Index_t iat, const std::vector<bool>& isAccepted,
+        bool forward_mode = true);
+
+    /** batched version  of acceptMove and reject Move fused, but only for spins
+     *
+     * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_
+     * gets reset to -1 This would cause the assertion that we have the right
+     * particle index to fail if done in the wrong order
+     */
+    static void
+    mw_accept_rejectSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+        Index_t iat, const std::vector<bool>& isAccepted);
+
+    void
+    initPropertyList();
+    inline int
+    addProperty(const std::string& pname)
+    {
+        return PropertyList.add(pname.c_str());
+    }
+
+    int
+    addPropertyHistory(int leng);
+    //        void rejectedMove();
+    //        void resetPropertyHistory( );
+    //        void addPropertyHistoryPoint(int index, RealType data);
+
+    void
+    convert(const ParticlePos& pin, ParticlePos& pout);
+    void
+    convert2Unit(const ParticlePos& pin, ParticlePos& pout);
+    void
+    convert2Cart(const ParticlePos& pin, ParticlePos& pout);
+    void
+    convert2Unit(ParticlePos& pout);
+    void
+    convert2Cart(ParticlePos& pout);
+    void
+    convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout);
+    void
+    convert2CartInBox(const ParticlePos& pint, ParticlePos& pout);
+
+    void
+    applyBC(const ParticlePos& pin, ParticlePos& pout);
+    void
+    applyBC(ParticlePos& pos);
+    void
+    applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last);
+    void
+    applyMinimumImage(ParticlePos& pinout);
+
+    /** load a Walker_t to the current ParticleSet
+     * @param awalker the reference to the walker to be loaded
+     * @param pbyp true if it is used by PbyP update
+     *
+     * PbyP requires the distance tables and Sk with awalker.R
+     */
+    void
+    loadWalker(Walker_t& awalker, bool pbyp);
+    /** batched version of loadWalker */
+    static void
+    mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
+        const RefVector<Walker_t>& walkers, const std::vector<bool>& recompute,
+        bool pbyp);
+
+    /** save this to awalker
+     *
+     *  just the R, G, and L
+     *  More duplicate data that makes code difficult to reason about should be
+     * removed.
+     */
+    void
+    saveWalker(Walker_t& awalker);
+
+    /** batched version of saveWalker
+     *
+     *  just the R, G, and L
+     */
+    static void
+    mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets,
+        const RefVector<Walker_t>& walkers);
+
+    /** update structure factor and unmark active_ptcl_
+     *@param skip SK update if skipSK is true
+     *
+     * The Coulomb interaction evaluation needs the structure factor.
+     * For these reason, call donePbyP after the loop of single
+     * electron moves before evaluating the Hamiltonian. Unmark
+     * active_ptcl_ is more of a safety measure probably not needed.
+     */
+    void
+    donePbyP(bool skipSK = false);
+    /// batched version of donePbyP
+    static void
+    mw_donePbyP(
+        const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK = false);
+
+    /// return the address of the values of Hamiltonian terms
+    inline FullPrecRealType* restrict getPropertyBase()
+    {
+        return Properties.data();
+    }
+
+    /// return the address of the values of Hamiltonian terms
+    inline const FullPrecRealType* restrict getPropertyBase() const
+    {
+        return Properties.data();
+    }
+
+    /// return the address of the i-th properties
+    inline FullPrecRealType* restrict getPropertyBase(int i)
+    {
+        return Properties[i];
+    }
+
+    /// return the address of the i-th properties
+    inline const FullPrecRealType* restrict getPropertyBase(int i) const
+    {
+        return Properties[i];
+    }
+
+    inline void
+    setTwist(const SingleParticlePos& t)
+    {
+        myTwist = t;
+    }
+    inline const SingleParticlePos&
+    getTwist() const
+    {
+        return myTwist;
+    }
+
+    /** Initialize particles around another ParticleSet
+     * Used to initialize an electron ParticleSet by an ion ParticleSet
+     */
+    void
+    randomizeFromSource(ParticleSetT& src);
+
+    /** get species name of particle i
+     */
+    inline const std::string&
+    species_from_index(int i)
+    {
+        return my_species_.speciesName[GroupID[i]];
+    }
+
+    inline size_t
+    getTotalNum() const
+    {
+        return TotalNum;
+    }
+
+    inline void
+    clear()
+    {
+        TotalNum = 0;
+
+        R.clear();
+        spins.clear();
+        GroupID.clear();
+        G.clear();
+        L.clear();
+        Mass.clear();
+        Z.clear();
+
+        coordinates_->resize(0);
+    }
+
+    /// return the number of groups
+    inline int
+    groups() const
+    {
+        return group_offsets_->size() - 1;
+    }
+
+    /// return the first index of a group i
+    inline int
+    first(int igroup) const
+    {
+        return (*group_offsets_)[igroup];
+    }
+
+    /// return the last index of a group i
+    inline int
+    last(int igroup) const
+    {
+        return (*group_offsets_)[igroup + 1];
+    }
+
+    /// return the group id of a given particle in the particle set.
+    inline int
+    getGroupID(int iat) const
+    {
+        assert(iat >= 0 && iat < TotalNum);
+        return GroupID[iat];
+    }
+
+    /// return the size of a group
+    inline int
+    groupsize(int igroup) const
+    {
+        return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup];
+    }
+
+    /// add attributes to list for IO
+    template <typename ATList>
+    inline void
+    createAttributeList(ATList& AttribList)
+    {
+        R.setTypeName(ParticleTags::postype_tag);
+        R.setObjName(ParticleTags::position_tag);
+        spins.setTypeName(ParticleTags::scalartype_tag);
+        spins.setObjName(ParticleTags::spins_tag);
+        GroupID.setTypeName(ParticleTags::indextype_tag);
+        GroupID.setObjName(ParticleTags::ionid_tag);
+        // add basic attributes
+        AttribList.add(R);
+        AttribList.add(spins);
+        AttribList.add(GroupID);
+
+        G.setTypeName(ParticleTags::gradtype_tag);
+        L.setTypeName(ParticleTags::laptype_tag);
+
+        G.setObjName("grad");
+        L.setObjName("lap");
+
+        AttribList.add(G);
+        AttribList.add(L);
+
+        // more particle attributes
+        Mass.setTypeName(ParticleTags::scalartype_tag);
+        Mass.setObjName("mass");
+        AttribList.add(Mass);
+
+        Z.setTypeName(ParticleTags::scalartype_tag);
+        Z.setObjName("charge");
+        AttribList.add(Z);
+    }
+
+    inline void
+    setMapStorageToInput(const std::vector<int>& mapping)
+    {
+        map_storage_to_input_ = mapping;
+    }
+    inline const std::vector<int>&
+    get_map_storage_to_input() const
+    {
+        return map_storage_to_input_;
+    }
+
+    inline int
+    getNumDistTables() const
+    {
+        return DistTables.size();
+    }
+
+    inline auto&
+    get_group_offsets() const
+    {
+        return *group_offsets_;
+    }
+
+    /// initialize a shared resource and hand it to a collection
+    void
+    createResource(ResourceCollection& collection) const;
+    /** acquire external resource and assocaite it with the list of ParticleSet
+     * Note: use RAII ResourceCollectionTeamLock whenever possible
+     */
+    static void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<ParticleSetT>& p_list);
+    /** release external resource
+     * Note: use RAII ResourceCollectionTeamLock whenever possible
+     */
+    static void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<ParticleSetT>& p_list);
+
+    static RefVectorWithLeader<DistanceTableT<T>>
+    extractDTRefList(const RefVectorWithLeader<ParticleSetT>& p_list, int id);
+    static RefVectorWithLeader<DynamicCoordinatesT<T>>
+    extractCoordsRefList(const RefVectorWithLeader<ParticleSetT>& p_list);
+    static RefVectorWithLeader<StructFactT<T>>
+    extractSKRefList(const RefVectorWithLeader<ParticleSetT>& p_list);
+
+protected:
+    /// reference to global simulation cell
+    const SimulationCellT<T>& simulation_cell_;
+
+    /// true if the particles have the same mass
+    bool same_mass_;
+    /// true is a dynamic spin calculation
+    bool is_spinor_;
+    /** the index of the active particle during particle-by-particle moves
+     *
+     * when a single particle move is proposed, the particle id is assigned to
+     * active_ptcl_ No matter the move is accepted or rejected, active_ptcl_ is
+     * marked back to -1. This state flag is used for picking coordinates and
+     * distances for SPO evaluation.
+     */
+    Index_t active_ptcl_;
+    /// the proposed position of active_ptcl_ during particle-by-particle moves
+    SingleParticlePos active_pos_;
+    /// the proposed spin of active_ptcl_ during particle-by-particle moves
+    Scalar_t active_spin_val_;
+
+    /** Map storage index to the input index.
+     * If not empty, particles were reordered by groups when being loaded from
+     * XML input. When other input data are affected by reordering, its builder
+     * should query this mapping. map_storage_to_input_[5] = 2 means the index
+     * 5(6th) particle in this ParticleSet was read from the index 2(3th)
+     * particle in the XML input
+     */
+    std::vector<int> map_storage_to_input_;
+
+    /// SpeciesSet of particles
+    SpeciesSet my_species_;
+
+    /// Structure factor
+    std::unique_ptr<StructFactT<T>> structure_factor_;
+
+    /// multi walker structure factor data
+    ResourceHandle<SKMultiWalkerMemT<T>> mw_structure_factor_data_handle_;
+
+    /** map to handle distance tables
+     *
+     * myDistTableMap[source-particle-tag]= locator in the distance table
+     * myDistTableMap[ObjectTag] === 0
+     */
+    std::map<std::string, int> myDistTableMap;
+
+    /// distance tables that need to be updated by moving this ParticleSet
+    std::vector<std::unique_ptr<DistanceTableT<T>>> DistTables;
+
+    /// Descriptions from distance table creation.  Same order as DistTables.
+    std::vector<std::string> distTableDescriptions;
+
+    TimerList_t myTimers;
+
+    SingleParticlePos myTwist;
+
+    std::string ParentName;
+
+    /// total number of particles
+    size_t TotalNum;
+
+    /// array to handle a group of distinct particles per species
+    std::shared_ptr<Vector<int, OMPallocator<int>>> group_offsets_;
+
+    /// internal representation of R. It can be an SoA copy of R
+    std::unique_ptr<DynamicCoordinatesT<T>> coordinates_;
+
+    /** compute temporal DistTables and SK for a new particle position
+     *
+     * @param iat the particle that is moved on a sphere
+     * @param newpos a new particle position
+     * @param maybe_accept if false, the caller guarantees that the proposed
+     * move will not be accepted.
+     */
+    void
+    computeNewPosDistTables(
+        Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true);
+
+    /** compute temporal DistTables and SK for a new particle position for each
+     * walker in a batch
+     *
+     * @param p_list the list of wrapped ParticleSet references in a walker
+     * batch
+     * @param iat the particle that is moved on a sphere
+     * @param new_positions new particle positions
+     * @param maybe_accept if false, the caller guarantees that the proposed
+     * move will not be accepted.
+     */
+    static void
+    mw_computeNewPosDistTables(const RefVectorWithLeader<ParticleSetT>& p_list,
+        Index_t iat, const std::vector<SingleParticlePos>& new_positions,
+        bool maybe_accept = true);
+
+    /** actual implemenation for accepting a proposed move in forward mode
+     *
+     * @param iat the index of the particle whose position and other attributes
+     * to be updated
+     */
+    void
+    acceptMoveForwardMode(Index_t iat);
+
+    /** reject a proposed move in forward mode
+     * @param iat the electron whose proposed move gets rejected.
+     */
+    void
+    rejectMoveForwardMode(Index_t iat);
+
+    /// resize internal storage
+    inline void
+    resize(size_t numPtcl)
+    {
+        TotalNum = numPtcl;
+
+        R.resize(numPtcl);
+        spins.resize(numPtcl);
+        GroupID.resize(numPtcl);
+        G.resize(numPtcl);
+        L.resize(numPtcl);
+        Mass.resize(numPtcl);
+        Z.resize(numPtcl);
+
+        coordinates_->resize(numPtcl);
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h
new file mode 100644
index 0000000000..3ea028b54f
--- /dev/null
+++ b/src/Particle/ParticleSetTraits.h
@@ -0,0 +1,85 @@
+#ifndef QMCPLUSPLUS_PARTICLESETTRAITS_H
+#define QMCPLUSPLUS_PARTICLESETTRAITS_H
+
+#include <config.h>
+
+#include "OhmmsData/RecordProperty.h"
+#include "OhmmsPETE/Tensor.h"
+#include "OhmmsPETE/TinyVector.h"
+#include "Particle/Lattice/CrystalLattice.h"
+#include "Particle/ParticleBase/ParticleAttrib.h"
+#include "type_traits/complex_help.hpp"
+
+namespace qmcplusplus
+{
+template <typename T>
+struct ParticleSetTraits
+{
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    using RealType = RealAlias<T>;
+    using ComplexType = std::complex<RealType>;
+    using ValueType = T;
+    using IndexType = int;
+    using PosType = TinyVector<RealType, DIM>;
+    using GradType = TinyVector<ValueType, DIM>;
+    // using HessType = Tensor<ValueType, DIM>;
+    // using TensorType = Tensor<ValueType, DIM>;
+    // using GradHessType = TinyVector<Tensor<ValueType, DIM>, DIM>;
+    // using IndexVector = Vector<IndexType>;
+    // using ValueVector = Vector<ValueType>;
+    // using ValueMatrix = Matrix<ValueType>;
+    // using GradVector = Vector<GradType>;
+    // using GradMatrix = Matrix<GradType>;
+    // using HessVector = Vector<HessType>;
+    // using HessMatrix = Matrix<HessType>;
+    // using GradHessVector = Vector<GradHessType>;
+    // using GradHessMatrix = Matrix<GradHessType>;
+    // using VGLVector = VectorSoaContainer<ValueType, DIM + 2>;
+
+    using FullPrecRealType = double;
+    using FullPrecComplexType = std::complex<double>;
+    using FullPrecValueType = std::conditional_t<IsComplex_t<T>::value,
+        FullPrecComplexType, FullPrecRealType>;
+
+    using PropertySetType = RecordNamedProperty<FullPrecRealType>;
+};
+
+template <typename T>
+struct LatticeParticleTraits
+{
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    using RealType = typename ParticleSetTraits<T>::RealType;
+
+    using ParticleLayout = CrystalLattice<RealType, DIM>;
+    using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex;
+    using SingleParticlePos = typename ParticleLayout::SingleParticlePos;
+    using ParticleTensorType = typename ParticleLayout::Tensor_t;
+
+    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
+    using FullPrecComplexType =
+        typename ParticleSetTraits<T>::FullPrecComplexType;
+    using FullPrecValueType = typename ParticleSetTraits<T>::FullPrecValueType;
+
+    using FullPrecGradType = TinyVector<FullPrecValueType, DIM>;
+
+    using Index_t = int;
+    using Scalar_t = FullPrecRealType;
+    using Complex_t = FullPrecComplexType;
+
+    using ParticleIndex = ParticleAttrib<Index_t>;
+    using ParticleScalar = ParticleAttrib<Scalar_t>;
+    using ParticlePos = ParticleAttrib<SingleParticlePos>;
+    using ParticleTensor = ParticleAttrib<ParticleTensorType>;
+
+    using ParticleGradient = ParticleAttrib<FullPrecGradType>;
+    using ParticleLaplacian = ParticleAttrib<FullPrecValueType>;
+    using SingleParticleValue = FullPrecValueType;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/RealSpacePositionsT.h b/src/Particle/RealSpacePositionsT.h
new file mode 100644
index 0000000000..7cd81723b6
--- /dev/null
+++ b/src/Particle/RealSpacePositionsT.h
@@ -0,0 +1,96 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file RealSpacePostions.h
+ */
+#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_H
+#define QMCPLUSPLUS_REALSPACE_POSITIONST_H
+
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "Particle/DynamicCoordinatesT.h"
+
+namespace qmcplusplus
+{
+/** Introduced to handle virtual moves and ratio computations, e.g. for
+ * non-local PP evaluations.
+ */
+template <typename T>
+class RealSpacePositionsT : public DynamicCoordinatesT<T>
+{
+public:
+    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
+    using RealType = typename DynamicCoordinatesT<T>::RealType;
+    using PosType = typename DynamicCoordinatesT<T>::PosType;
+    using PosVectorSoa = typename DynamicCoordinatesT<T>::PosVectorSoa;
+
+    RealSpacePositionsT() :
+        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS)
+    {
+    }
+
+    std::unique_ptr<DynamicCoordinatesT<T>>
+    makeClone() override
+    {
+        return std::make_unique<RealSpacePositionsT>(*this);
+    }
+
+    void
+    resize(size_t n) override
+    {
+        RSoA.resize(n);
+    }
+    size_t
+    size() const override
+    {
+        return RSoA.size();
+    }
+
+    void
+    setAllParticlePos(const ParticlePos& R) override
+    {
+        resize(R.size());
+        RSoA.copyIn(R);
+    }
+    void
+    setOneParticlePos(const PosType& pos, size_t iat) override
+    {
+        RSoA(iat) = pos;
+    }
+
+    void
+    mw_acceptParticlePos(
+        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+        size_t iat, const std::vector<PosType>& new_positions,
+        const std::vector<bool>& isAccepted) const override
+    {
+        assert(this == &coords_list.getLeader());
+        for (size_t iw = 0; iw < isAccepted.size(); iw++)
+            if (isAccepted[iw])
+                coords_list[iw].setOneParticlePos(new_positions[iw], iat);
+    }
+
+    const PosVectorSoa&
+    getAllParticlePos() const override
+    {
+        return RSoA;
+    }
+    PosType
+    getOneParticlePos(size_t iat) const override
+    {
+        return RSoA[iat];
+    }
+
+private:
+    /// particle positions in SoA layout
+    PosVectorSoa RSoA;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/RealSpacePositionsTOMPTarget.h b/src/Particle/RealSpacePositionsTOMPTarget.h
new file mode 100644
index 0000000000..57a81f6c85
--- /dev/null
+++ b/src/Particle/RealSpacePositionsTOMPTarget.h
@@ -0,0 +1,328 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file RealSpacePostionsOMPTarget.h
+ */
+#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H
+#define QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H
+
+#include "OMPTarget/OMPallocator.hpp"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "Particle/DynamicCoordinatesT.h"
+#include "Platforms/PinnedAllocator.h"
+#include "ResourceCollection.h"
+
+namespace qmcplusplus
+{
+/** Introduced to handle virtual moves and ratio computations, e.g. for
+ * non-local PP evaluations.
+ */
+template <typename T>
+class RealSpacePositionsTOMPTarget : public DynamicCoordinatesT<T>
+{
+public:
+    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
+    using RealType = typename DynamicCoordinatesT<T>::RealType;
+    using PosType = typename DynamicCoordinatesT<T>::PosType;
+    using PosVectorSoa = typename DynamicCoordinatesT<T>::PosVectorSoa;
+    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+
+    RealSpacePositionsTOMPTarget() :
+        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD),
+        is_host_position_changed_(false)
+    {
+    }
+    RealSpacePositionsTOMPTarget(const RealSpacePositionsTOMPTarget& in) :
+        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD),
+        RSoA(in.RSoA)
+    {
+        RSoA_hostview.attachReference(
+            RSoA.size(), RSoA.capacity(), RSoA.data());
+        updateH2D();
+    }
+
+    std::unique_ptr<DynamicCoordinatesT<T>>
+    makeClone() override
+    {
+        return std::make_unique<RealSpacePositionsTOMPTarget>(*this);
+    }
+
+    void
+    resize(size_t n) override
+    {
+        if (RSoA.size() != n) {
+            RSoA.resize(n);
+            RSoA_hostview.attachReference(
+                RSoA.size(), RSoA.capacity(), RSoA.data());
+        }
+    }
+
+    size_t
+    size() const override
+    {
+        return RSoA_hostview.size();
+    }
+
+    void
+    setAllParticlePos(const ParticlePos& R) override
+    {
+        resize(R.size());
+        RSoA_hostview.copyIn(R);
+        updateH2D();
+        is_nw_new_pos_prepared = false;
+    }
+
+    void
+    setOneParticlePos(const PosType& pos, size_t iat) override
+    {
+        RSoA_hostview(iat) = pos;
+        is_host_position_changed_ = true;
+        /* This was too slow due to overhead.
+        RealType x     = pos[0];
+        RealType y     = pos[1];
+        RealType z     = pos[2];
+        RealType* data = RSoA.data();
+        size_t offset  = RSoA.capacity();
+
+        PRAGMA_OFFLOAD("omp target map(to : x, y, z, iat)")
+        {
+          data[iat]              = x;
+          data[iat + offset]     = y;
+          data[iat + offset * 2] = z;
+        }
+        */
+    }
+
+    void
+    mw_copyActivePos(
+        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+        size_t iat, const std::vector<PosType>& new_positions) const override
+    {
+        assert(this == &coords_list.getLeader());
+        auto& coords_leader =
+            coords_list
+                .template getCastedLeader<RealSpacePositionsTOMPTarget>();
+
+        const auto nw = coords_list.size();
+        auto& mw_new_pos =
+            coords_leader.mw_mem_handle_.getResource().mw_new_pos;
+        mw_new_pos.resize(nw);
+
+        for (int iw = 0; iw < nw; iw++)
+            mw_new_pos(iw) = new_positions[iw];
+
+        auto* mw_pos_ptr = mw_new_pos.data();
+        PRAGMA_OFFLOAD("omp target update to(\
+            mw_pos_ptr[DIM * mw_new_pos.capacity()])")
+
+        coords_leader.is_nw_new_pos_prepared = true;
+    }
+
+    void
+    mw_acceptParticlePos(
+        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+        size_t iat, const std::vector<PosType>& new_positions,
+        const std::vector<bool>& isAccepted) const override
+    {
+        assert(this == &coords_list.getLeader());
+        const size_t nw = coords_list.size();
+        auto& coords_leader =
+            coords_list
+                .template getCastedLeader<RealSpacePositionsTOMPTarget>();
+        MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_;
+        auto& mw_new_pos = mw_mem.mw_new_pos;
+        auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs;
+        auto& mw_accept_indices = mw_mem.mw_accept_indices;
+
+        if (!is_nw_new_pos_prepared) {
+            mw_copyActivePos(coords_list, iat, new_positions);
+            app_warning() << "This message only appear in unit tests. Report a "
+                             "bug if seen in production code."
+                          << std::endl;
+        }
+
+        coords_leader.is_nw_new_pos_prepared = false;
+
+        mw_accept_indices.resize(nw);
+        auto* restrict id_array = mw_accept_indices.data();
+
+        size_t num_accepted = 0;
+        for (int iw = 0; iw < nw; iw++)
+            if (isAccepted[iw]) {
+                auto& coords = coords_list.template getCastedElement<
+                    RealSpacePositionsTOMPTarget>(iw);
+                id_array[num_accepted] = iw;
+                // save new coordinates on host copy
+                coords.RSoA_hostview(iat) = mw_new_pos[iw];
+                num_accepted++;
+            }
+
+        // early return to avoid OpenMP runtime mishandling of size 0 in
+        // transfer/compute.
+        if (num_accepted == 0)
+            return;
+
+        // offload to GPU
+        auto* restrict mw_pos_ptr = mw_new_pos.data();
+        auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data();
+        const size_t rsoa_stride = RSoA.capacity();
+        const size_t mw_pos_stride = mw_new_pos.capacity();
+
+        PRAGMA_OFFLOAD("omp target teams distribute parallel for \
+                    map(always, to : id_array[:num_accepted])")
+        for (int i = 0; i < num_accepted; i++) {
+            const int iw = id_array[i];
+            RealType* RSoA_dev_ptr = mw_rosa_ptr[iw];
+            for (int id = 0; id < QMCTraits::DIM; id++)
+                RSoA_dev_ptr[iat + rsoa_stride * id] =
+                    mw_pos_ptr[iw + mw_pos_stride * id];
+        }
+    }
+
+    const PosVectorSoa&
+    getAllParticlePos() const override
+    {
+        return RSoA_hostview;
+    }
+    PosType
+    getOneParticlePos(size_t iat) const override
+    {
+        return RSoA_hostview[iat];
+    }
+
+    void
+    donePbyP() override
+    {
+        is_nw_new_pos_prepared = false;
+        if (is_host_position_changed_) {
+            updateH2D();
+            is_host_position_changed_ = false;
+        }
+    }
+
+    const RealType*
+    getDevicePtr() const
+    {
+        return RSoA.device_data();
+    }
+
+    const auto&
+    getFusedNewPosBuffer() const
+    {
+        return mw_mem_handle_.getResource().mw_new_pos;
+    }
+
+    void
+    createResource(ResourceCollection& collection) const override
+    {
+        auto resource_index =
+            collection.addResource(std::make_unique<MultiWalkerMem>());
+    }
+
+    void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list)
+        const override
+    {
+        MultiWalkerMem& mw_mem =
+            coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>()
+                .mw_mem_handle_ = collection.lendResource<MultiWalkerMem>();
+
+        auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs);
+        const auto nw = coords_list.size();
+        mw_rsoa_ptrs.resize(nw);
+        for (int iw = 0; iw < nw; iw++) {
+            auto& coords =
+                coords_list
+                    .template getCastedElement<RealSpacePositionsTOMPTarget>(
+                        iw);
+            mw_rsoa_ptrs[iw] = coords.RSoA.device_data();
+        }
+        mw_rsoa_ptrs.updateTo();
+    }
+
+    void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list)
+        const override
+    {
+        collection.takebackResource(
+            coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>()
+                .mw_mem_handle_);
+    }
+
+    const auto&
+    getMultiWalkerRSoADevicePtrs() const
+    {
+        return mw_mem_handle_.getResource().mw_rsoa_ptrs;
+    }
+
+private:
+    /// particle positions in SoA layout
+    VectorSoaContainer<RealType, QMCTraits::DIM,
+        OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
+        RSoA;
+
+    /// multi walker shared memory buffer
+    struct MultiWalkerMem : public Resource
+    {
+        /// one particle new/old positions in SoA layout
+        VectorSoaContainer<RealType, QMCTraits::DIM,
+            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
+            mw_new_pos;
+
+        /// accept list
+        Vector<int, OMPallocator<int, PinnedAlignedAllocator<int>>>
+            mw_accept_indices;
+
+        /// RSoA device ptr list
+        Vector<RealType*,
+            OMPallocator<RealType*, PinnedAlignedAllocator<RealType*>>>
+            mw_rsoa_ptrs;
+
+        MultiWalkerMem() : Resource("MultiWalkerMem")
+        {
+        }
+
+        MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem()
+        {
+        }
+
+        std::unique_ptr<Resource>
+        makeClone() const override
+        {
+            return std::make_unique<MultiWalkerMem>(*this);
+        }
+    };
+
+    ResourceHandle<MultiWalkerMem> mw_mem_handle_;
+
+    /// host view of RSoA
+    PosVectorSoa RSoA_hostview;
+
+    /// if true, host position has been changed while the device copy has not
+    /// been updated.
+    bool is_host_position_changed_;
+
+    /// if true, mw_new_pos has been updated with active positions.
+    bool is_nw_new_pos_prepared;
+
+    void
+    updateH2D()
+    {
+        RealType* data = RSoA.data();
+        PRAGMA_OFFLOAD(
+            "omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])")
+        is_host_position_changed_ = false;
+    }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/SimulationCellT.cpp b/src/Particle/SimulationCellT.cpp
new file mode 100644
index 0000000000..8ad7295bb6
--- /dev/null
+++ b/src/Particle/SimulationCellT.cpp
@@ -0,0 +1,74 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SimulationCellT.h"
+#include "Platforms/Host/OutputManager.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+SimulationCellT<T>::SimulationCellT() = default;
+
+template <typename T>
+SimulationCellT<T>::SimulationCellT(const Lattice& lattice) : lattice_(lattice)
+{
+    resetLRBox();
+}
+
+template <typename T>
+void
+SimulationCellT<T>::resetLRBox()
+{
+    if (lattice_.SuperCellEnum != SUPERCELL_OPEN) {
+        lattice_.SetLRCutoffs(lattice_.Rv);
+        LRBox_ = lattice_;
+        bool changed = false;
+        if (lattice_.SuperCellEnum == SUPERCELL_SLAB &&
+            lattice_.VacuumScale != 1.0) {
+            LRBox_.R(2, 0) *= lattice_.VacuumScale;
+            LRBox_.R(2, 1) *= lattice_.VacuumScale;
+            LRBox_.R(2, 2) *= lattice_.VacuumScale;
+            changed = true;
+        }
+        else if (lattice_.SuperCellEnum == SUPERCELL_WIRE &&
+            lattice_.VacuumScale != 1.0) {
+            LRBox_.R(1, 0) *= lattice_.VacuumScale;
+            LRBox_.R(1, 1) *= lattice_.VacuumScale;
+            LRBox_.R(1, 2) *= lattice_.VacuumScale;
+            LRBox_.R(2, 0) *= lattice_.VacuumScale;
+            LRBox_.R(2, 1) *= lattice_.VacuumScale;
+            LRBox_.R(2, 2) *= lattice_.VacuumScale;
+            changed = true;
+        }
+        LRBox_.reset();
+        LRBox_.SetLRCutoffs(LRBox_.Rv);
+        LRBox_.printCutoffs(app_log());
+
+        if (changed) {
+            app_summary()
+                << "  Simulation box changed by vacuum supercell conditions"
+                << std::endl;
+            app_log() << "--------------------------------------- "
+                      << std::endl;
+            LRBox_.print(app_log());
+            app_log() << "--------------------------------------- "
+                      << std::endl;
+        }
+
+        k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim);
+    }
+}
+
+template class SimulationCellT<double>;
+template class SimulationCellT<float>;
+template class SimulationCellT<std::complex<double>>;
+template class SimulationCellT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/Particle/SimulationCellT.h b/src/Particle/SimulationCellT.h
new file mode 100644
index 0000000000..ff8240325a
--- /dev/null
+++ b/src/Particle/SimulationCellT.h
@@ -0,0 +1,71 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_SIMULATIONCELLT_H
+#define QMCPLUSPLUS_SIMULATIONCELLT_H
+
+#include "LongRange/KContainerT.h"
+#include "ParticleSetTraits.h"
+
+namespace qmcplusplus
+{
+class ParticleSetPool;
+
+template <typename T>
+class SimulationCellT
+{
+public:
+    using Lattice = typename LatticeParticleTraits<T>::ParticleLayout;
+
+    SimulationCellT();
+    SimulationCellT(const Lattice& lattice);
+
+    const Lattice&
+    getLattice() const
+    {
+        return lattice_;
+    }
+    const Lattice&
+    getPrimLattice() const
+    {
+        return primative_lattice_;
+    }
+    const Lattice&
+    getLRBox() const
+    {
+        return LRBox_;
+    }
+
+    void
+    resetLRBox();
+
+    /// access k_lists_ read only
+    const KContainerT<T>&
+    getKLists() const
+    {
+        return k_lists_;
+    }
+
+private:
+    /// simulation cell lattice
+    Lattice lattice_;
+    /// Primative cell lattice
+    Lattice primative_lattice_;
+    /// long-range box
+    Lattice LRBox_;
+
+    /// K-Vector List.
+    KContainerT<T> k_lists_;
+
+    friend class ParticleSetPool;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/SoaDistanceTableAAT.h b/src/Particle/SoaDistanceTableAAT.h
new file mode 100644
index 0000000000..289bcc22a8
--- /dev/null
+++ b/src/Particle/SoaDistanceTableAAT.h
@@ -0,0 +1,237 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+// -*- C++ -*-
+#ifndef QMCPLUSPLUS_DTDIMPL_AAT_H
+#define QMCPLUSPLUS_DTDIMPL_AAT_H
+
+#include "CPU/SIMD/algorithm.hpp"
+#include "Lattice/ParticleBConds3DSoa.h"
+#include "Particle/DistanceTableT.h"
+
+namespace qmcplusplus
+{
+/**@ingroup nnlist
+ * @brief A derived classe from DistacneTableData, specialized for dense case
+ */
+template <typename T, unsigned D, int SC>
+struct SoaDistanceTableAAT :
+    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+    public DistanceTableAAT<T>
+{
+    using RealType = typename DistanceTableAAT<T>::RealType;
+    using PosType = typename DistanceTableAAT<T>::PosType;
+    using IndexType = typename DistanceTableAAT<T>::IndexType;
+
+    /// actual memory for dist and displacements_
+    aligned_vector<RealType> memory_pool_;
+
+    SoaDistanceTableAAT(ParticleSetT<T>& target) :
+        DTD_BConds<RealType, D, SC>(target.getLattice()),
+        DistanceTableAAT<T>(target, DTModes::ALL_OFF),
+        num_targets_padded_(getAlignedSize<RealType>(this->num_targets_)),
+#if !defined(NDEBUG)
+        old_prepared_elec_id_(-1),
+#endif
+        evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") +
+                target.getName() + "_" + target.getName(),
+            timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTAA::move_") +
+                target.getName() + "_" + target.getName(),
+            timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTAA::update_") +
+                target.getName() + "_" + target.getName(),
+            timer_level_fine))
+    {
+        resize();
+    }
+
+    SoaDistanceTableAAT() = delete;
+    SoaDistanceTableAAT(const SoaDistanceTableAAT&) = delete;
+    ~SoaDistanceTableAAT() override
+    {
+    }
+
+    size_t
+    compute_size(int N) const
+    {
+        const size_t num_padded = getAlignedSize<RealType>(N);
+        const size_t Alignment = getAlignment<RealType>();
+        return (num_padded * (2 * N - num_padded + 1) +
+                   (Alignment - 1) * num_padded) /
+            2;
+    }
+
+    void
+    resize()
+    {
+        // initialize memory containers and views
+        const size_t total_size = compute_size(this->num_targets_);
+        memory_pool_.resize(total_size * (1 + D));
+        this->distances_.resize(this->num_targets_);
+        this->displacements_.resize(this->num_targets_);
+        for (int i = 0; i < this->num_targets_; ++i) {
+            this->distances_[i].attachReference(
+                memory_pool_.data() + compute_size(i), i);
+            this->displacements_[i].attachReference(i, total_size,
+                memory_pool_.data() + total_size + compute_size(i));
+        }
+
+        this->old_r_.resize(this->num_targets_);
+        this->old_dr_.resize(this->num_targets_);
+        this->temp_r_.resize(this->num_targets_);
+        this->temp_dr_.resize(this->num_targets_);
+    }
+
+    inline void
+    evaluate(ParticleSetT<T>& P) override
+    {
+        ScopedTimer local_timer(evaluate_timer_);
+        constexpr RealType BigR = std::numeric_limits<RealType>::max();
+        for (int iat = 1; iat < this->num_targets_; ++iat)
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                P.getCoordinates().getAllParticlePos(),
+                this->distances_[iat].data(), this->displacements_[iat], 0, iat,
+                iat);
+    }
+
+    /// evaluate the temporary pair relations
+    inline void
+    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
+        bool prepare_old) override
+    {
+        ScopedTimer local_timer(move_timer_);
+
+#if !defined(NDEBUG)
+        old_prepared_elec_id_ = prepare_old ? iat : -1;
+#endif
+        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
+            P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
+            this->temp_dr_, 0, this->num_targets_, iat);
+        // set up old_r_ and old_dr_ for moves may get accepted.
+        if (prepare_old) {
+            // recompute from scratch
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                P.getCoordinates().getAllParticlePos(), this->old_r_.data(),
+                this->old_dr_, 0, this->num_targets_, iat);
+            this->old_r_[iat] =
+                std::numeric_limits<RealType>::max(); // assign a big number
+        }
+    }
+
+    int
+    get_first_neighbor(
+        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    {
+        // ensure there are neighbors
+        assert(this->num_targets_ > 1);
+        RealType min_dist = std::numeric_limits<RealType>::max();
+        int index = -1;
+        if (newpos) {
+            for (int jat = 0; jat < this->num_targets_; ++jat)
+                if (this->temp_r_[jat] < min_dist && jat != iat) {
+                    min_dist = this->temp_r_[jat];
+                    index = jat;
+                }
+            assert(index >= 0);
+            dr = this->temp_dr_[index];
+        }
+        else {
+            for (int jat = 0; jat < iat; ++jat)
+                if (this->distances_[iat][jat] < min_dist) {
+                    min_dist = this->distances_[iat][jat];
+                    index = jat;
+                }
+            for (int jat = iat + 1; jat < this->num_targets_; ++jat)
+                if (this->distances_[jat][iat] < min_dist) {
+                    min_dist = this->distances_[jat][iat];
+                    index = jat;
+                }
+            assert(index != iat && index >= 0);
+            if (index < iat)
+                dr = this->displacements_[iat][index];
+            else
+                dr = this->displacements_[index][iat];
+        }
+        r = min_dist;
+        return index;
+    }
+
+    /** After accepting the iat-th particle, update the iat-th row of distances_
+     * and displacements_. Upper triangle is not needed in the later computation
+     * and thus not updated
+     */
+    inline void
+    update(IndexType iat) override
+    {
+        ScopedTimer local_timer(update_timer_);
+        // update [0, iat)
+        const int nupdate = iat;
+        // copy row
+        assert(nupdate <= this->temp_r_.size());
+        std::copy_n(
+            this->temp_r_.data(), nupdate, this->distances_[iat].data());
+        for (int idim = 0; idim < D; ++idim)
+            std::copy_n(this->temp_dr_.data(idim), nupdate,
+                this->displacements_[iat].data(idim));
+        // copy column
+        for (size_t i = iat + 1; i < this->num_targets_; ++i) {
+            this->distances_[i][iat] = this->temp_r_[i];
+            this->displacements_[i](iat) = -this->temp_dr_[i];
+        }
+    }
+
+    void
+    updatePartial(IndexType jat, bool from_temp) override
+    {
+        ScopedTimer local_timer(update_timer_);
+        // update [0, jat)
+        const int nupdate = jat;
+        if (from_temp) {
+            // copy row
+            assert(nupdate <= this->temp_r_.size());
+            std::copy_n(
+                this->temp_r_.data(), nupdate, this->distances_[jat].data());
+            for (int idim = 0; idim < D; ++idim)
+                std::copy_n(this->temp_dr_.data(idim), nupdate,
+                    this->displacements_[jat].data(idim));
+        }
+        else {
+            assert(old_prepared_elec_id_ == jat);
+            // copy row
+            assert(nupdate <= this->old_r_.size());
+            std::copy_n(
+                this->old_r_.data(), nupdate, this->distances_[jat].data());
+            for (int idim = 0; idim < D; ++idim)
+                std::copy_n(this->old_dr_.data(idim), nupdate,
+                    this->displacements_[jat].data(idim));
+        }
+    }
+
+private:
+    /// number of targets with padding
+    const size_t num_targets_padded_;
+#if !defined(NDEBUG)
+    /** set to particle id after move() with prepare_old = true. -1 means not
+     * prepared. It is intended only for safety checks, not for codepath
+     * selection.
+     */
+    int old_prepared_elec_id_;
+#endif
+    /// timer for evaluate()
+    NewTimer& evaluate_timer_;
+    /// timer for move()
+    NewTimer& move_timer_;
+    /// timer for update()
+    NewTimer& update_timer_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/SoaDistanceTableAATOMPTarget.h b/src/Particle/SoaDistanceTableAATOMPTarget.h
new file mode 100644
index 0000000000..e9a453fcd3
--- /dev/null
+++ b/src/Particle/SoaDistanceTableAATOMPTarget.h
@@ -0,0 +1,624 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+// -*- C++ -*-
+#ifndef QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H
+#define QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H
+
+#include "CPU/SIMD/algorithm.hpp"
+#include "DistanceTableT.h"
+#include "Lattice/ParticleBConds3DSoa.h"
+#include "OMPTarget/OMPTargetMath.hpp"
+#include "OMPTarget/OMPallocator.hpp"
+#include "Particle/RealSpacePositionsTOMPTarget.h"
+#include "Platforms/PinnedAllocator.h"
+#include "ResourceCollection.h"
+
+namespace qmcplusplus
+{
+/**@ingroup nnlist
+ * @brief A derived classe from DistacneTableData, specialized for dense case
+ */
+template <typename T, unsigned D, int SC>
+struct SoaDistanceTableAATOMPTarget :
+    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+    public DistanceTableAAT<T>
+{
+    using RealType = typename DistanceTableAAT<T>::RealType;
+    using PosType = typename DistanceTableAAT<T>::PosType;
+    using IndexType = typename DistanceTableAAT<T>::IndexType;
+    using DistRow = typename DistanceTableAAT<T>::DistRow;
+    using DisplRow = typename DistanceTableAAT<T>::DisplRow;
+
+    /// actual memory for dist and displacements_
+    aligned_vector<RealType> memory_pool_;
+
+    /// actual memory for temp_r_
+    DistRow temp_r_mem_;
+    /// actual memory for temp_dr_
+    DisplRow temp_dr_mem_;
+    /// actual memory for old_r_
+    DistRow old_r_mem_;
+    /// actual memory for old_dr_
+    DisplRow old_dr_mem_;
+
+    /// multi walker shared memory buffer
+    struct DTAAMultiWalkerMem : public Resource
+    {
+        /// dist displ for temporary and old pairs
+        Vector<RealType,
+            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
+            mw_new_old_dist_displ;
+
+        /** distances from a range of indics to the source.
+         * for original particle index i (row) and source particle id j (col)
+         * j < i,  the element data is dist(r_i - r_j)
+         * j > i,  the element data is dist(r_(n - 1 - i) - r_(n - 1 - j))
+         */
+        Vector<RealType,
+            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
+            mw_distances_subset;
+
+        DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem")
+        {
+        }
+
+        DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem()
+        {
+        }
+
+        std::unique_ptr<Resource>
+        makeClone() const override
+        {
+            return std::make_unique<DTAAMultiWalkerMem>(*this);
+        }
+    };
+
+    ResourceHandle<DTAAMultiWalkerMem> mw_mem_handle_;
+
+    SoaDistanceTableAATOMPTarget(ParticleSetT<T>& target) :
+        DTD_BConds<RealType, D, SC>(target.getLattice()),
+        DistanceTableAAT<T>(target, DTModes::ALL_OFF),
+        num_targets_padded_(getAlignedSize<T>(this->num_targets_)),
+#if !defined(NDEBUG)
+        old_prepared_elec_id_(-1),
+#endif
+        offload_timer_(createGlobalTimer(
+            std::string("DTAAOMPTarget::offload_") + this->name_,
+            timer_level_fine)),
+        evaluate_timer_(createGlobalTimer(
+            std::string("DTAAOMPTarget::evaluate_") + this->name_,
+            timer_level_fine)),
+        move_timer_(
+            createGlobalTimer(std::string("DTAAOMPTarget::move_") + this->name_,
+                timer_level_fine)),
+        update_timer_(createGlobalTimer(
+            std::string("DTAAOMPTarget::update_") + this->name_,
+            timer_level_fine))
+
+    {
+        auto* coordinates_soa =
+            dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(
+                &target.getCoordinates());
+        if (!coordinates_soa)
+            throw std::runtime_error("Source particle set doesn't have OpenMP "
+                                     "offload. Contact developers!");
+        resize();
+        PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
+    }
+
+    SoaDistanceTableAATOMPTarget() = delete;
+    SoaDistanceTableAATOMPTarget(const SoaDistanceTableAATOMPTarget&) = delete;
+    ~SoaDistanceTableAATOMPTarget(){
+        PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")}
+
+    size_t compute_size(int N) const
+    {
+        const size_t num_padded = getAlignedSize<T>(N);
+        const size_t Alignment = getAlignment<T>();
+        return (num_padded * (2 * N - num_padded + 1) +
+                   (Alignment - 1) * num_padded) /
+            2;
+    }
+
+    void
+    resize()
+    {
+        // initialize memory containers and views
+        const size_t total_size = compute_size(this->num_targets_);
+        memory_pool_.resize(total_size * (1 + D));
+        this->distances_.resize(this->num_targets_);
+        this->displacements_.resize(this->num_targets_);
+        for (int i = 0; i < this->num_targets_; ++i) {
+            this->distances_[i].attachReference(
+                memory_pool_.data() + compute_size(i), i);
+            this->displacements_[i].attachReference(i, total_size,
+                memory_pool_.data() + total_size + compute_size(i));
+        }
+
+        old_r_mem_.resize(this->num_targets_);
+        old_dr_mem_.resize(this->num_targets_);
+        temp_r_mem_.resize(this->num_targets_);
+        temp_dr_mem_.resize(this->num_targets_);
+    }
+
+    const RealType*
+    getMultiWalkerTempDataPtr() const override
+    {
+        return mw_mem_handle_.getResource().mw_new_old_dist_displ.data();
+    }
+
+    void
+    createResource(ResourceCollection& collection) const override
+    {
+        auto resource_index =
+            collection.addResource(std::make_unique<DTAAMultiWalkerMem>());
+    }
+
+    void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+    {
+        assert(this == &dt_list.getLeader());
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+        dt_leader.mw_mem_handle_ =
+            collection.lendResource<DTAAMultiWalkerMem>();
+        const size_t nw = dt_list.size();
+        const size_t stride_size = num_targets_padded_ * (D + 1);
+
+        for (int iw = 0; iw < nw; iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
+                    iw);
+            dt.temp_r_.free();
+            dt.temp_dr_.free();
+            dt.old_r_.free();
+            dt.old_dr_.free();
+        }
+
+        auto& mw_new_old_dist_displ =
+            dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ;
+        mw_new_old_dist_displ.resize(nw * 2 * stride_size);
+        for (int iw = 0; iw < nw; iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
+                    iw);
+            dt.temp_r_.attachReference(
+                mw_new_old_dist_displ.data() + stride_size * iw,
+                num_targets_padded_);
+            dt.temp_dr_.attachReference(this->num_targets_, num_targets_padded_,
+                mw_new_old_dist_displ.data() + stride_size * iw +
+                    num_targets_padded_);
+            dt.old_r_.attachReference(
+                mw_new_old_dist_displ.data() + stride_size * (iw + nw),
+                num_targets_padded_);
+            dt.old_dr_.attachReference(this->num_targets_, num_targets_padded_,
+                mw_new_old_dist_displ.data() + stride_size * (iw + nw) +
+                    num_targets_padded_);
+        }
+    }
+
+    void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+    {
+        collection.takebackResource(
+            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>()
+                .mw_mem_handle_);
+        const size_t nw = dt_list.size();
+        for (int iw = 0; iw < nw; iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
+                    iw);
+            dt.temp_r_.free();
+            dt.temp_dr_.free();
+            dt.old_r_.free();
+            dt.old_dr_.free();
+        }
+    }
+
+    inline void
+    evaluate(ParticleSetT<T>& P) override
+    {
+        ScopedTimer local_timer(evaluate_timer_);
+
+        constexpr T BigR = std::numeric_limits<T>::max();
+        for (int iat = 1; iat < this->num_targets_; ++iat)
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                P.getCoordinates().getAllParticlePos(),
+                this->distances_[iat].data(), this->displacements_[iat], 0, iat,
+                iat);
+    }
+
+    /** compute distances from particles in [range_begin, range_end) to all the
+     * particles. Although [range_begin, range_end) and be any particle [0,
+     * num_sources), it is only necessary to compute half of the table due to
+     * the symmetry of AA table. See note of the output data object
+     * mw_distances_subset To keep resident memory minimal on the device,
+     * range_end - range_begin < num_particls_stored is required.
+     */
+    const RealType*
+    mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list, size_t range_begin,
+        size_t range_end) const override
+    {
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+        const size_t subset_size = range_end - range_begin;
+        if (subset_size > dt_leader.num_particls_stored)
+            throw std::runtime_error("not enough internal buffer");
+
+        ScopedTimer local_timer(dt_leader.evaluate_timer_);
+
+        DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+        auto& pset_leader = p_list.getLeader();
+
+        const size_t nw = dt_list.size();
+        const auto num_sources_local = dt_leader.num_targets_;
+        const auto num_padded = dt_leader.num_targets_padded_;
+        mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded);
+
+        const int ChunkSizePerTeam = 512;
+        const size_t num_teams =
+            (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+        auto& coordinates_leader =
+            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
+                pset_leader.getCoordinates());
+
+        auto* rsoa_dev_list_ptr =
+            coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
+        auto* dist_ranged = mw_mem.mw_distances_subset.data();
+        {
+            ScopedTimer offload(dt_leader.offload_timer_);
+            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+                           num_teams(nw * num_teams)")
+            for (int iw = 0; iw < nw; ++iw)
+                for (int team_id = 0; team_id < num_teams; team_id++) {
+                    auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
+                    const size_t first = ChunkSizePerTeam * team_id;
+                    const size_t last = omptarget::min(
+                        first + ChunkSizePerTeam, num_sources_local);
+
+                    PRAGMA_OFFLOAD("omp parallel for")
+                    for (int iel = first; iel < last; iel++) {
+                        for (int irow = 0; irow < subset_size; irow++) {
+                            RealType* dist = dist_ranged +
+                                (irow + subset_size * iw) * num_padded;
+                            size_t id_target = irow + range_begin;
+
+                            RealType dx, dy, dz;
+                            if (id_target < iel) {
+                                dx = source_pos_ptr[id_target] -
+                                    source_pos_ptr[iel];
+                                dy = source_pos_ptr[id_target + num_padded] -
+                                    source_pos_ptr[iel + num_padded];
+                                dz =
+                                    source_pos_ptr[id_target + num_padded * 2] -
+                                    source_pos_ptr[iel + num_padded * 2];
+                            }
+                            else {
+                                const size_t id_target_reverse =
+                                    num_sources_local - 1 - id_target;
+                                const size_t iel_reverse =
+                                    num_sources_local - 1 - iel;
+                                dx = source_pos_ptr[id_target_reverse] -
+                                    source_pos_ptr[iel_reverse];
+                                dy = source_pos_ptr[id_target_reverse +
+                                         num_padded] -
+                                    source_pos_ptr[iel_reverse + num_padded];
+                                dz = source_pos_ptr[id_target_reverse +
+                                         num_padded * 2] -
+                                    source_pos_ptr[iel_reverse +
+                                        num_padded * 2];
+                            }
+
+                            dist[iel] =
+                                DTD_BConds<RealType, D, SC>::computeDist(
+                                    dx, dy, dz);
+                        }
+                    }
+                }
+        }
+        return mw_mem.mw_distances_subset.data();
+    }
+
+    /// evaluate the temporary pair relations
+    inline void
+    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
+        bool prepare_old) override
+    {
+        ScopedTimer local_timer(move_timer_);
+
+#if !defined(NDEBUG)
+        old_prepared_elec_id_ = prepare_old ? iat : -1;
+#endif
+        this->temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size());
+        this->temp_dr_.attachReference(
+            temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data());
+
+        assert((prepare_old && iat >= 0 && iat < this->num_targets_) ||
+            !prepare_old);
+        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
+            P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
+            this->temp_dr_, 0, this->num_targets_, iat);
+        // set up old_r_ and old_dr_ for moves may get accepted.
+        if (prepare_old) {
+            this->old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size());
+            this->old_dr_.attachReference(
+                old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data());
+            // recompute from scratch
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                P.getCoordinates().getAllParticlePos(), this->old_r_.data(),
+                this->old_dr_, 0, this->num_targets_, iat);
+            this->old_r_[iat] =
+                std::numeric_limits<RealType>::max(); // assign a big number
+        }
+    }
+
+    /** evaluate the temporary pair relations when a move is proposed
+     * this implementation is asynchronous and the synchronization is managed at
+     * ParticleSet. Transferring results to host depends on
+     * DTModes::NEED_TEMP_DATA_ON_HOST. If the temporary pair distance are
+     * consumed on the device directly, the device to host data transfer can be
+     * skipped as an optimization.
+     */
+    void
+    mw_move(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const std::vector<PosType>& rnew_list, const IndexType iat,
+        bool prepare_old = true) const override
+    {
+        assert(this == &dt_list.getLeader());
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+        DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+        auto& pset_leader = p_list.getLeader();
+
+        ScopedTimer local_timer(move_timer_);
+        const size_t nw = dt_list.size();
+        const size_t stride_size = num_targets_padded_ * (D + 1);
+
+        auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ;
+
+        for (int iw = 0; iw < nw; iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
+                    iw);
+#if !defined(NDEBUG)
+            dt.old_prepared_elec_id_ = prepare_old ? iat : -1;
+#endif
+            auto& coordinates_soa =
+                static_cast<const RealSpacePositionsTOMPTarget<T>&>(
+                    p_list[iw].getCoordinates());
+        }
+
+        const int ChunkSizePerTeam = 512;
+        const size_t num_teams =
+            (this->num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+        auto& coordinates_leader =
+            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
+                pset_leader.getCoordinates());
+
+        const auto num_sources_local = this->num_targets_;
+        const auto num_padded = num_targets_padded_;
+        auto* rsoa_dev_list_ptr =
+            coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
+        auto* r_dr_ptr = mw_new_old_dist_displ.data();
+        auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data();
+        const size_t new_pos_stride =
+            coordinates_leader.getFusedNewPosBuffer().capacity();
+
+        {
+            ScopedTimer offload(offload_timer_);
+            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+                num_teams(nw * num_teams) nowait \
+                depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])")
+            for (int iw = 0; iw < nw; ++iw)
+                for (int team_id = 0; team_id < num_teams; team_id++) {
+                    auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
+                    const size_t first = ChunkSizePerTeam * team_id;
+                    const size_t last = omptarget::min(
+                        first + ChunkSizePerTeam, num_sources_local);
+
+                    { // temp
+                        auto* r_iw_ptr = r_dr_ptr + iw * stride_size;
+                        auto* dr_iw_ptr =
+                            r_dr_ptr + iw * stride_size + num_padded;
+
+                        RealType pos[D];
+                        for (int idim = 0; idim < D; idim++)
+                            pos[idim] = new_pos_ptr[idim * new_pos_stride + iw];
+
+                        PRAGMA_OFFLOAD("omp parallel for")
+                        for (int iel = first; iel < last; iel++)
+                            DTD_BConds<RealType, D,
+                                SC>::computeDistancesOffload(pos,
+                                source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
+                                num_padded, iel, iat);
+                    }
+
+                    if (prepare_old) { // old
+                        auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size;
+                        auto* dr_iw_ptr =
+                            r_dr_ptr + (iw + nw) * stride_size + num_padded;
+
+                        RealType pos[D];
+                        for (int idim = 0; idim < D; idim++)
+                            pos[idim] = source_pos_ptr[idim * num_padded + iat];
+
+                        PRAGMA_OFFLOAD("omp parallel for")
+                        for (int iel = first; iel < last; iel++)
+                            DTD_BConds<RealType, D,
+                                SC>::computeDistancesOffload(pos,
+                                source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
+                                num_padded, iel, iat);
+                        r_iw_ptr[iat] =
+                            std::numeric_limits<RealType>::max(); // assign a
+                                                                  // big number
+                    }
+                }
+        }
+
+        if (this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) {
+            PRAGMA_OFFLOAD("omp target update nowait \
+                depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \
+                      from(r_dr_ptr[:mw_new_old_dist_displ.size()])")
+        }
+    }
+
+    int
+    get_first_neighbor(
+        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    {
+        // ensure there are neighbors
+        assert(this->num_targets_ > 1);
+        RealType min_dist = std::numeric_limits<RealType>::max();
+        int index = -1;
+        if (newpos) {
+            for (int jat = 0; jat < this->num_targets_; ++jat)
+                if (this->temp_r_[jat] < min_dist && jat != iat) {
+                    min_dist = this->temp_r_[jat];
+                    index = jat;
+                }
+            assert(index >= 0);
+            dr = this->temp_dr_[index];
+        }
+        else {
+            for (int jat = 0; jat < iat; ++jat)
+                if (this->distances_[iat][jat] < min_dist) {
+                    min_dist = this->distances_[iat][jat];
+                    index = jat;
+                }
+            for (int jat = iat + 1; jat < this->num_targets_; ++jat)
+                if (this->distances_[jat][iat] < min_dist) {
+                    min_dist = this->distances_[jat][iat];
+                    index = jat;
+                }
+            assert(index != iat && index >= 0);
+            if (index < iat)
+                dr = this->displacements_[iat][index];
+            else
+                dr = this->displacements_[index][iat];
+        }
+        r = min_dist;
+        return index;
+    }
+
+    /** After accepting the iat-th particle, update the iat-th row of distances_
+     * and displacements_. Upper triangle is not needed in the later computation
+     * and thus not updated
+     */
+    inline void
+    update(IndexType iat) override
+    {
+        ScopedTimer local_timer(update_timer_);
+        // update [0, iat) columns
+        const int nupdate = iat;
+        // copy row
+        assert(nupdate <= this->temp_r_.size());
+        std::copy_n(
+            this->temp_r_.data(), nupdate, this->distances_[iat].data());
+        for (int idim = 0; idim < D; ++idim)
+            std::copy_n(this->temp_dr_.data(idim), nupdate,
+                this->displacements_[iat].data(idim));
+        // copy column
+        for (size_t i = iat + 1; i < this->num_targets_; ++i) {
+            this->distances_[i][iat] = this->temp_r_[i];
+            this->displacements_[i](iat) = -this->temp_dr_[i];
+        }
+    }
+
+    void
+    updatePartial(IndexType jat, bool from_temp) override
+    {
+        ScopedTimer local_timer(update_timer_);
+
+        // update [0, jat)
+        const int nupdate = jat;
+        if (from_temp) {
+            // copy row
+            assert(nupdate <= this->temp_r_.size());
+            std::copy_n(
+                this->temp_r_.data(), nupdate, this->distances_[jat].data());
+            for (int idim = 0; idim < D; ++idim)
+                std::copy_n(this->temp_dr_.data(idim), nupdate,
+                    this->displacements_[jat].data(idim));
+        }
+        else {
+            assert(old_prepared_elec_id_ == jat);
+            // copy row
+            assert(nupdate <= this->old_r_.size());
+            std::copy_n(
+                this->old_r_.data(), nupdate, this->distances_[jat].data());
+            for (int idim = 0; idim < D; ++idim)
+                std::copy_n(this->old_dr_.data(idim), nupdate,
+                    this->displacements_[jat].data(idim));
+        }
+    }
+
+    void
+    mw_updatePartial(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        IndexType jat, const std::vector<bool>& from_temp) override
+    {
+        // if temp data on host is not updated by mw_move during p-by-p moves,
+        // there is no need to update distance table
+        if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST))
+            return;
+
+        for (int iw = 0; iw < dt_list.size(); iw++)
+            dt_list[iw].updatePartial(jat, from_temp[iw]);
+    }
+
+    void
+    mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
+    {
+        // if the distance table is not updated by mw_move during p-by-p, needs
+        // to recompute the whole table before being used by Hamiltonian if
+        // requested
+        if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) &&
+            (this->modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP))
+            this->mw_evaluate(dt_list, p_list);
+    }
+
+    size_t
+    get_num_particls_stored() const override
+    {
+        return num_particls_stored;
+    }
+
+private:
+    /// number of targets with padding
+    const size_t num_targets_padded_;
+#if !defined(NDEBUG)
+    /** set to particle id after move() with prepare_old = true. -1 means not
+     * prepared. It is intended only for safety checks, not for codepath
+     * selection.
+     */
+    int old_prepared_elec_id_;
+#endif
+    /// timer for offload portion
+    NewTimer& offload_timer_;
+    /// timer for evaluate()
+    NewTimer& evaluate_timer_;
+    /// timer for move()
+    NewTimer& move_timer_;
+    /// timer for update()
+    NewTimer& update_timer_;
+    /// the particle count of the internal stored distances.
+    const size_t num_particls_stored = 64;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/SoaDistanceTableABT.h b/src/Particle/SoaDistanceTableABT.h
new file mode 100644
index 0000000000..e2eb2709bf
--- /dev/null
+++ b/src/Particle/SoaDistanceTableABT.h
@@ -0,0 +1,170 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+// -*- C++ -*-
+#ifndef QMCPLUSPLUS_DTDIMPL_ABT_H
+#define QMCPLUSPLUS_DTDIMPL_ABT_H
+
+#include "Concurrency/OpenMP.h"
+#include "Lattice/ParticleBConds3DSoa.h"
+#include "Particle/DistanceTableT.h"
+#include "Utilities/FairDivide.h"
+
+namespace qmcplusplus
+{
+/**@ingroup nnlist
+ * @brief A derived classe from DistacneTableData, specialized for AB using a
+ * transposed form
+ */
+template <typename T, unsigned D, int SC>
+struct SoaDistanceTableABT :
+    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+    public DistanceTableABT<T>
+{
+    using RealType = typename DistanceTableABT<T>::RealType;
+    using PosType = typename DistanceTableABT<T>::PosType;
+    using IndexType = typename DistanceTableABT<T>::IndexType;
+
+    SoaDistanceTableABT(
+        const ParticleSetT<T>& source, ParticleSetT<T>& target) :
+        DTD_BConds<RealType, D, SC>(source.getLattice()),
+        DistanceTableABT<T>(source, target, DTModes::ALL_OFF),
+        evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") +
+                target.getName() + "_" + source.getName(),
+            timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTAB::move_") +
+                target.getName() + "_" + source.getName(),
+            timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTAB::update_") +
+                target.getName() + "_" + source.getName(),
+            timer_level_fine))
+    {
+        resize();
+    }
+
+    void
+    resize()
+    {
+        if (this->num_sources_ * this->num_targets_ == 0)
+            return;
+
+        // initialize memory containers and views
+        const int num_sources_padded = getAlignedSize<RealType>(this->num_sources_);
+        this->distances_.resize(this->num_targets_);
+        this->displacements_.resize(this->num_targets_);
+        for (int i = 0; i < this->num_targets_; ++i) {
+            this->distances_[i].resize(num_sources_padded);
+            this->displacements_[i].resize(num_sources_padded);
+        }
+
+        // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
+        // in the update function temp_r_ is padded explicitly while temp_dr_ is
+        // padded internally
+        this->temp_r_.resize(num_sources_padded);
+        this->temp_dr_.resize(this->num_sources_);
+    }
+
+    SoaDistanceTableABT() = delete;
+    SoaDistanceTableABT(const SoaDistanceTableABT&) = delete;
+
+    /** evaluate the full table */
+    inline void
+    evaluate(ParticleSetT<T>& P) override
+    {
+        ScopedTimer local_timer(evaluate_timer_);
+#pragma omp parallel
+        {
+            int first, last;
+            FairDivideAligned(this->num_sources_, getAlignment<RealType>(),
+                omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+            // be aware of the sign of Displacement
+            for (int iat = 0; iat < this->num_targets_; ++iat)
+                DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                    this->origin_.getCoordinates().getAllParticlePos(),
+                    this->distances_[iat].data(), this->displacements_[iat],
+                    first, last);
+        }
+    }
+
+    /// evaluate the temporary pair relations
+    inline void
+    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
+        bool prepare_old) override
+    {
+        ScopedTimer local_timer(move_timer_);
+        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
+            this->origin_.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
+            this->temp_dr_, 0, this->num_sources_);
+        // If the full table is not ready all the time, overwrite the current
+        // value. If this step is missing, DT values can be undefined in case a
+        // move is rejected.
+        if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                this->origin_.getCoordinates().getAllParticlePos(),
+                this->distances_[iat].data(), this->displacements_[iat], 0,
+                this->num_sources_);
+    }
+
+    /// update the stripe for jat-th particle
+    inline void
+    update(IndexType iat) override
+    {
+        ScopedTimer local_timer(update_timer_);
+        std::copy_n(this->temp_r_.data(), this->num_sources_,
+            this->distances_[iat].data());
+        for (int idim = 0; idim < D; ++idim)
+            std::copy_n(this->temp_dr_.data(idim), this->num_sources_,
+                this->displacements_[iat].data(idim));
+    }
+
+    int
+    get_first_neighbor(
+        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    {
+        RealType min_dist = std::numeric_limits<RealType>::max();
+        int index = -1;
+        if (newpos) {
+            for (int jat = 0; jat < this->num_sources_; ++jat)
+                if (this->temp_r_[jat] < min_dist) {
+                    min_dist = this->temp_r_[jat];
+                    index = jat;
+                }
+            if (index >= 0) {
+                r = min_dist;
+                dr = this->temp_dr_[index];
+            }
+        }
+        else {
+            for (int jat = 0; jat < this->num_sources_; ++jat)
+                if (this->distances_[iat][jat] < min_dist) {
+                    min_dist = this->distances_[iat][jat];
+                    index = jat;
+                }
+            if (index >= 0) {
+                r = min_dist;
+                dr = this->displacements_[iat][index];
+            }
+        }
+        assert(index >= 0 && index < this->num_sources_);
+        return index;
+    }
+
+private:
+    /// timer for evaluate()
+    NewTimer& evaluate_timer_;
+    /// timer for move()
+    NewTimer& move_timer_;
+    /// timer for update()
+    NewTimer& update_timer_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/SoaDistanceTableABTOMPTarget.h b/src/Particle/SoaDistanceTableABTOMPTarget.h
new file mode 100644
index 0000000000..452100cb25
--- /dev/null
+++ b/src/Particle/SoaDistanceTableABTOMPTarget.h
@@ -0,0 +1,513 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+// -*- C++ -*-
+#ifndef QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H
+#define QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H
+
+#include "DistanceTableT.h"
+#include "Lattice/ParticleBConds3DSoa.h"
+#include "OMPTarget/OMPTargetMath.hpp"
+#include "OMPTarget/OMPallocator.hpp"
+#include "Particle/RealSpacePositionsTOMPTarget.h"
+#include "Platforms/PinnedAllocator.h"
+#include "ResourceCollection.h"
+
+namespace qmcplusplus
+{
+/**@ingroup nnlist
+ * @brief A derived classe from DistacneTableData, specialized for AB using a
+ * transposed form
+ */
+template <typename T, unsigned D, int SC>
+class SoaDistanceTableABTOMPTarget :
+    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+    public DistanceTableABT<T>
+{
+private:
+    template <typename DT>
+    using OffloadPinnedVector =
+        Vector<DT, OMPallocator<DT, PinnedAlignedAllocator<DT>>>;
+
+    using RealType = typename DistanceTableABT<T>::RealType;
+    using PosType = typename DistanceTableABT<T>::PosType;
+    using IndexType = typename DistanceTableABT<T>::IndexType;
+
+    /// accelerator output buffer for r and dr
+    OffloadPinnedVector<RealType> r_dr_memorypool_;
+    /// accelerator input array for a list of target particle positions,
+    /// num_targets_ x D
+    OffloadPinnedVector<RealType> target_pos;
+
+    /// multi walker shared memory buffer
+    struct DTABMultiWalkerMem : public Resource
+    {
+        /// accelerator output array for multiple walkers,
+        /// [1+D][num_targets_][num_padded] (distances, displacements)
+        OffloadPinnedVector<RealType> mw_r_dr;
+        /// accelerator input buffer for multiple data set
+        OffloadPinnedVector<char> offload_input;
+
+        DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem")
+        {
+        }
+
+        DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem()
+        {
+        }
+
+        std::unique_ptr<Resource>
+        makeClone() const override
+        {
+            return std::make_unique<DTABMultiWalkerMem>(*this);
+        }
+    };
+
+    ResourceHandle<DTABMultiWalkerMem> mw_mem_handle_;
+
+    void
+    resize()
+    {
+        if (this->num_sources_ * this->num_targets_ == 0)
+            return;
+        if (this->distances_.size())
+            return;
+
+        // initialize memory containers and views
+        const size_t num_padded = getAlignedSize<RealType>(this->num_sources_);
+        const size_t stride_size = getPerTargetPctlStrideSize();
+        r_dr_memorypool_.resize(stride_size * this->num_targets_);
+
+        this->distances_.resize(this->num_targets_);
+        this->displacements_.resize(this->num_targets_);
+        for (int i = 0; i < this->num_targets_; ++i) {
+            this->distances_[i].attachReference(
+                r_dr_memorypool_.data() + i * stride_size, this->num_sources_);
+            this->displacements_[i].attachReference(this->num_sources_,
+                num_padded,
+                r_dr_memorypool_.data() + i * stride_size + num_padded);
+        }
+    }
+
+    static void
+    associateResource(const RefVectorWithLeader<DistanceTableT<T>>& dt_list)
+    {
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
+
+        // initialize memory containers and views
+        size_t count_targets = 0;
+        for (size_t iw = 0; iw < dt_list.size(); iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
+                    iw);
+            count_targets += dt.targets();
+            dt.r_dr_memorypool_.free();
+        }
+
+        const size_t num_sources = dt_leader.num_sources_;
+        const size_t num_padded =
+            getAlignedSize<RealType>(dt_leader.num_sources_);
+        const size_t stride_size = num_padded * (D + 1);
+        const size_t total_targets = count_targets;
+        auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr;
+        mw_r_dr.resize(total_targets * stride_size);
+
+        count_targets = 0;
+        for (size_t iw = 0; iw < dt_list.size(); iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
+                    iw);
+            assert(num_sources == dt.num_sources_);
+
+            dt.distances_.resize(dt.targets());
+            dt.displacements_.resize(dt.targets());
+
+            for (int i = 0; i < dt.targets(); ++i) {
+                dt.distances_[i].attachReference(
+                    mw_r_dr.data() + (i + count_targets) * stride_size,
+                    num_sources);
+                dt.displacements_[i].attachReference(num_sources, num_padded,
+                    mw_r_dr.data() + (i + count_targets) * stride_size +
+                        num_padded);
+            }
+            count_targets += dt.targets();
+        }
+    }
+
+public:
+    SoaDistanceTableABTOMPTarget(
+        const ParticleSetT<T>& source, ParticleSetT<T>& target) :
+        DTD_BConds<RealType, D, SC>(source.getLattice()),
+        DistanceTableABT<T>(source, target, DTModes::ALL_OFF),
+        offload_timer_(createGlobalTimer(
+            std::string("DTABOMPTarget::offload_") + this->name_,
+            timer_level_fine)),
+        evaluate_timer_(createGlobalTimer(
+            std::string("DTABOMPTarget::evaluate_") + this->name_,
+            timer_level_fine)),
+        move_timer_(
+            createGlobalTimer(std::string("DTABOMPTarget::move_") + this->name_,
+                timer_level_fine)),
+        update_timer_(createGlobalTimer(
+            std::string("DTABOMPTarget::update_") + this->name_,
+            timer_level_fine))
+
+    {
+        auto* coordinates_soa =
+            dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(
+                &source.getCoordinates());
+        if (!coordinates_soa)
+            throw std::runtime_error("Source particle set doesn't have OpenMP "
+                                     "offload. Contact developers!");
+        PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
+
+        // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
+        // in the update function temp_r_ is padded explicitly while temp_dr_ is
+        // padded internally
+        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
+        this->temp_r_.resize(num_padded);
+        this->temp_dr_.resize(this->num_sources_);
+    }
+
+    SoaDistanceTableABTOMPTarget() = delete;
+    SoaDistanceTableABTOMPTarget(const SoaDistanceTableABTOMPTarget&) = delete;
+
+    ~SoaDistanceTableABTOMPTarget()
+    {
+        PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")
+    }
+
+    void
+    createResource(ResourceCollection& collection) const override
+    {
+        auto resource_index =
+            collection.addResource(std::make_unique<DTABMultiWalkerMem>());
+    }
+
+    void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+    {
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
+        dt_leader.mw_mem_handle_ =
+            collection.lendResource<DTABMultiWalkerMem>();
+        associateResource(dt_list);
+    }
+
+    void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+    {
+        collection.takebackResource(
+            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>()
+                .mw_mem_handle_);
+        for (size_t iw = 0; iw < dt_list.size(); iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
+                    iw);
+            dt.distances_.clear();
+            dt.displacements_.clear();
+        }
+    }
+
+    const RealType*
+    getMultiWalkerDataPtr() const override
+    {
+        return mw_mem_handle_.getResource().mw_r_dr.data();
+    }
+
+    size_t
+    getPerTargetPctlStrideSize() const override
+    {
+        return getAlignedSize<RealType>(this->num_sources_) * (D + 1);
+    }
+
+    /** evaluate the full table */
+    inline void
+    evaluate(ParticleSetT<T>& P) override
+    {
+        resize();
+
+        ScopedTimer local_timer(evaluate_timer_);
+        // be aware of the sign of Displacement
+        const int num_targets_local = this->num_targets_;
+        const int num_sources_local = this->num_sources_;
+        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
+
+        target_pos.resize(this->num_targets_ * D);
+        for (size_t iat = 0; iat < this->num_targets_; iat++)
+            for (size_t idim = 0; idim < D; idim++)
+                target_pos[iat * D + idim] = P.R[iat][idim];
+
+        auto* target_pos_ptr = target_pos.data();
+        auto* source_pos_ptr =
+            this->origin_.getCoordinates().getAllParticlePos().data();
+        auto* r_dr_ptr = this->distances_[0].data();
+        assert(this->distances_[0].data() + num_padded ==
+            this->displacements_[0].data());
+
+        // To maximize thread usage, the loop over electrons is chunked. Each
+        // chunk is sent to an OpenMP offload thread team.
+        const int ChunkSizePerTeam = 512;
+        const size_t num_teams =
+            (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+        const size_t stride_size = getPerTargetPctlStrideSize();
+
+        {
+            ScopedTimer offload(offload_timer_);
+            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+                num_teams(this->num_targets_*num_teams) \
+                map(to: source_pos_ptr[:num_padded*D]) \
+                map(always, to: target_pos_ptr[:this->num_targets_*D]) \
+                map(always, from: r_dr_ptr[:this->num_targets_*stride_size])")
+            for (int iat = 0; iat < num_targets_local; ++iat)
+                for (int team_id = 0; team_id < num_teams; team_id++) {
+                    const int first = ChunkSizePerTeam * team_id;
+                    const int last = omptarget::min(
+                        first + ChunkSizePerTeam, num_sources_local);
+
+                    RealType pos[D];
+                    for (int idim = 0; idim < D; idim++)
+                        pos[idim] = target_pos_ptr[iat * D + idim];
+
+                    auto* r_iat_ptr = r_dr_ptr + iat * stride_size;
+                    auto* dr_iat_ptr = r_iat_ptr + num_padded;
+
+                    PRAGMA_OFFLOAD("omp parallel for")
+                    for (int iel = first; iel < last; iel++)
+                        DTD_BConds<RealType, D, SC>::computeDistancesOffload(
+                            pos, source_pos_ptr, num_padded, r_iat_ptr,
+                            dr_iat_ptr, num_padded, iel);
+                }
+        }
+    }
+
+    inline void
+    mw_evaluate(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
+    {
+        assert(this == &dt_list.getLeader());
+        auto& dt_leader =
+            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
+
+        ScopedTimer local_timer(evaluate_timer_);
+
+        const size_t nw = dt_list.size();
+        DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+        auto& mw_r_dr = mw_mem.mw_r_dr;
+
+        size_t count_targets = 0;
+        for (ParticleSetT<T>& p : p_list)
+            count_targets += p.getTotalNum();
+        const size_t total_targets = count_targets;
+
+        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
+
+#ifndef NDEBUG
+        const int stride_size = getPerTargetPctlStrideSize();
+        count_targets = 0;
+        for (size_t iw = 0; iw < dt_list.size(); iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
+                    iw);
+
+            for (int i = 0; i < dt.targets(); ++i) {
+                assert(dt.distances_[i].data() ==
+                    mw_r_dr.data() + (i + count_targets) * stride_size);
+                assert(dt.displacements_[i].data() ==
+                    mw_r_dr.data() + (i + count_targets) * stride_size +
+                        num_padded);
+            }
+            count_targets += dt.targets();
+        }
+#endif
+
+        // This is horrible optimization putting different data types in a
+        // single buffer but allows a single H2D transfer
+        const size_t realtype_size = sizeof(RealType);
+        const size_t int_size = sizeof(int);
+        const size_t ptr_size = sizeof(RealType*);
+        auto& offload_input = mw_mem.offload_input;
+        offload_input.resize(total_targets * D * realtype_size +
+            total_targets * int_size + nw * ptr_size);
+        auto source_ptrs = reinterpret_cast<RealType**>(offload_input.data());
+        auto target_positions =
+            reinterpret_cast<RealType*>(offload_input.data() + ptr_size * nw);
+        auto walker_id_ptr = reinterpret_cast<int*>(offload_input.data() +
+            ptr_size * nw + total_targets * D * realtype_size);
+
+        count_targets = 0;
+        for (size_t iw = 0; iw < nw; iw++) {
+            auto& dt =
+                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
+                    iw);
+            ParticleSetT<T>& pset(p_list[iw]);
+
+            assert(dt.targets() == pset.getTotalNum());
+            assert(this->num_sources_ == dt.num_sources_);
+
+            auto& RSoA_OMPTarget =
+                static_cast<const RealSpacePositionsTOMPTarget<T>&>(
+                    dt.origin_.getCoordinates());
+            source_ptrs[iw] =
+                const_cast<RealType*>(RSoA_OMPTarget.getDevicePtr());
+
+            for (size_t iat = 0; iat < pset.getTotalNum();
+                 ++iat, ++count_targets) {
+                walker_id_ptr[count_targets] = iw;
+                for (size_t idim = 0; idim < D; idim++)
+                    target_positions[count_targets * D + idim] =
+                        pset.R[iat][idim];
+            }
+        }
+
+        // To maximize thread usage, the loop over electrons is chunked. Each
+        // chunk is sent to an OpenMP offload thread team.
+        const int ChunkSizePerTeam = 512;
+        const size_t num_teams =
+            (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+        auto* r_dr_ptr = mw_r_dr.data();
+        auto* input_ptr = offload_input.data();
+        const int num_sources_local = this->num_sources_;
+
+        {
+            ScopedTimer offload(dt_leader.offload_timer_);
+            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+                num_teams(total_targets*num_teams) \
+                map(always, to: input_ptr[:offload_input.size()]) \
+                depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait")
+            for (int iat = 0; iat < total_targets; ++iat)
+                for (int team_id = 0; team_id < num_teams; team_id++) {
+                    auto* target_pos_ptr =
+                        reinterpret_cast<RealType*>(input_ptr + ptr_size * nw);
+                    const int walker_id = reinterpret_cast<int*>(input_ptr +
+                        ptr_size * nw + total_targets * D * realtype_size)[iat];
+                    auto* source_pos_ptr =
+                        reinterpret_cast<RealType**>(input_ptr)[walker_id];
+                    auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1);
+                    auto* dr_iat_ptr =
+                        r_dr_ptr + iat * num_padded * (D + 1) + num_padded;
+
+                    const int first = ChunkSizePerTeam * team_id;
+                    const int last = omptarget::min(
+                        first + ChunkSizePerTeam, num_sources_local);
+
+                    RealType pos[D];
+                    for (int idim = 0; idim < D; idim++)
+                        pos[idim] = target_pos_ptr[iat * D + idim];
+
+                    PRAGMA_OFFLOAD("omp parallel for")
+                    for (int iel = first; iel < last; iel++)
+                        DTD_BConds<RealType, D, SC>::computeDistancesOffload(
+                            pos, source_pos_ptr, num_padded, r_iat_ptr,
+                            dr_iat_ptr, num_padded, iel);
+                }
+
+            if (!(this->modes_ &
+                    DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) {
+                PRAGMA_OFFLOAD(
+                    "omp target update from(r_dr_ptr[:mw_r_dr.size()]) \
+                    depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
+            }
+            // wait for computing and (optional) transferring back to host.
+            // It can potentially be moved to ParticleSet to fuse multiple
+            // similar taskwait
+            PRAGMA_OFFLOAD("omp taskwait")
+        }
+    }
+
+    inline void
+    mw_recompute(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const std::vector<bool>& recompute) const override
+    {
+        mw_evaluate(dt_list, p_list);
+    }
+
+    /// evaluate the temporary pair relations
+    inline void
+    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
+        bool prepare_old) override
+    {
+        ScopedTimer local_timer(move_timer_);
+        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
+            this->origin_.getCoordinates().getAllParticlePos(),
+            this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_);
+        // If the full table is not ready all the time, overwrite the current
+        // value. If this step is missing, DT values can be undefined in case a
+        // move is rejected.
+        if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
+            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
+                this->origin_.getCoordinates().getAllParticlePos(),
+                this->distances_[iat].data(), this->displacements_[iat], 0,
+                this->num_sources_);
+    }
+
+    /// update the stripe for jat-th particle
+    inline void
+    update(IndexType iat) override
+    {
+        ScopedTimer local_timer(update_timer_);
+        std::copy_n(this->temp_r_.data(), this->num_sources_,
+            this->distances_[iat].data());
+        for (int idim = 0; idim < D; ++idim)
+            std::copy_n(this->temp_dr_.data(idim), this->num_sources_,
+                this->displacements_[iat].data(idim));
+    }
+
+    int
+    get_first_neighbor(
+        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    {
+        RealType min_dist = std::numeric_limits<RealType>::max();
+        int index = -1;
+        if (newpos) {
+            for (int jat = 0; jat < this->num_sources_; ++jat)
+                if (this->temp_r_[jat] < min_dist) {
+                    min_dist = this->temp_r_[jat];
+                    index = jat;
+                }
+            if (index >= 0) {
+                r = min_dist;
+                dr = this->temp_dr_[index];
+            }
+        }
+        else {
+            for (int jat = 0; jat < this->num_sources_; ++jat)
+                if (this->distances_[iat][jat] < min_dist) {
+                    min_dist = this->distances_[iat][jat];
+                    index = jat;
+                }
+            if (index >= 0) {
+                r = min_dist;
+                dr = this->displacements_[iat][index];
+            }
+        }
+        assert(index >= 0 && index < this->num_sources_);
+        return index;
+    }
+
+private:
+    /// timer for offload portion
+    NewTimer& offload_timer_;
+    /// timer for evaluate()
+    NewTimer& evaluate_timer_;
+    /// timer for move()
+    NewTimer& move_timer_;
+    /// timer for update()
+    NewTimer& update_timer_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/VirtualParticleSetT.cpp b/src/Particle/VirtualParticleSetT.cpp
new file mode 100644
index 0000000000..1f896405fc
--- /dev/null
+++ b/src/Particle/VirtualParticleSetT.cpp
@@ -0,0 +1,272 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file VirtualParticleSet.cpp
+ * A proxy class to the quantum ParticleSet
+ */
+
+#include "VirtualParticleSetT.h"
+
+#include "Particle/DistanceTableT.h"
+#include "Particle/createDistanceTableT.h"
+#include "QMCHamiltonians/NLPPJob.h"
+#include "ResourceCollection.h"
+
+namespace qmcplusplus
+{
+
+struct VPMultiWalkerMem : public Resource
+{
+    /// multi walker reference particle
+    Vector<int, OffloadPinnedAllocator<int>> mw_refPctls;
+
+    VPMultiWalkerMem() : Resource("VPMultiWalkerMem")
+    {
+    }
+
+    VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem()
+    {
+    }
+
+    std::unique_ptr<Resource>
+    makeClone() const override
+    {
+        return std::make_unique<VPMultiWalkerMem>(*this);
+    }
+};
+
+template <typename T>
+VirtualParticleSetT<T>::VirtualParticleSetT(
+    const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit) :
+    ParticleSetT<T>(p.getSimulationCell())
+{
+    this->setName("virtual");
+
+    // initialize local data structure
+    this->setSpinor(p.isSpinor());
+    this->TotalNum = nptcl;
+    this->R.resize(nptcl);
+    if (this->isSpinor())
+        this->spins.resize(nptcl);
+    this->coordinates_->resize(nptcl);
+
+    // create distancetables
+    assert(dt_count_limit <= p.getNumDistTables());
+    if (dt_count_limit == 0)
+        dt_count_limit = p.getNumDistTables();
+    for (int i = 0; i < dt_count_limit; ++i)
+        if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST)
+            this->addTable(p.getDistTable(i).get_origin());
+        else
+            this->addTable(p.getDistTable(i).get_origin(),
+                DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST);
+}
+
+template <typename T>
+VirtualParticleSetT<T>::~VirtualParticleSetT() = default;
+
+template <typename T>
+Vector<int, OffloadPinnedAllocator<int>>&
+VirtualParticleSetT<T>::getMultiWalkerRefPctls()
+{
+    return mw_mem_handle_.getResource().mw_refPctls;
+}
+
+template <typename T>
+const Vector<int, OffloadPinnedAllocator<int>>&
+VirtualParticleSetT<T>::getMultiWalkerRefPctls() const
+{
+    return mw_mem_handle_.getResource().mw_refPctls;
+}
+
+template <typename T>
+void
+VirtualParticleSetT<T>::createResource(ResourceCollection& collection) const
+{
+    collection.addResource(std::make_unique<VPMultiWalkerMem>());
+    ParticleSetT<T>::createResource(collection);
+}
+
+template <typename T>
+void
+VirtualParticleSetT<T>::acquireResource(ResourceCollection& collection,
+    const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+{
+    auto& vp_leader = vp_list.getLeader();
+    vp_leader.mw_mem_handle_ = collection.lendResource<VPMultiWalkerMem>();
+
+    auto p_list = RefVectorWithLeaderParticleSet(vp_list);
+    ParticleSetT<T>::acquireResource(collection, p_list);
+}
+
+template <typename T>
+void
+VirtualParticleSetT<T>::releaseResource(ResourceCollection& collection,
+    const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+{
+    collection.takebackResource(vp_list.getLeader().mw_mem_handle_);
+    auto p_list = RefVectorWithLeaderParticleSet(vp_list);
+    ParticleSetT<T>::releaseResource(collection, p_list);
+}
+
+/// move virtual particles to new postions and update distance tables
+template <typename T>
+void
+VirtualParticleSetT<T>::makeMoves(const ParticleSetT<T>& refp, int jel,
+    const std::vector<PosType>& deltaV, bool sphere, int iat)
+{
+    if (sphere && iat < 0)
+        throw std::runtime_error(
+            "VirtualParticleSet::makeMoves is invoked incorrectly, the flag "
+            "sphere=true requires iat specified!");
+    onSphere = sphere;
+    refPS = refp;
+    refPtcl = jel;
+    refSourcePtcl = iat;
+    assert(this->R.size() == deltaV.size());
+    for (size_t ivp = 0; ivp < this->R.size(); ivp++)
+        this->R[ivp] = refp.R[jel] + deltaV[ivp];
+    if (refp.isSpinor())
+        for (size_t ivp = 0; ivp < this->R.size(); ivp++)
+            this->spins[ivp] = refp.spins[jel]; // no spin deltas in this API
+    this->update();
+}
+
+/// move virtual particles to new postions and update distance tables
+template <typename T>
+void
+VirtualParticleSetT<T>::makeMovesWithSpin(const ParticleSetT<T>& refp, int jel,
+    const std::vector<PosType>& deltaV, const std::vector<RealType>& deltaS,
+    bool sphere, int iat)
+{
+    assert(refp.isSpinor());
+    if (sphere && iat < 0)
+        throw std::runtime_error(
+            "VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the "
+            "flag sphere=true requires iat specified!");
+    onSphere = sphere;
+    refPS = refp;
+    refPtcl = jel;
+    refSourcePtcl = iat;
+    assert(this->R.size() == deltaV.size());
+    assert(this->spins.size() == deltaS.size());
+    for (size_t ivp = 0; ivp < this->R.size(); ivp++) {
+        this->R[ivp] = refp.R[jel] + deltaV[ivp];
+        this->spins[ivp] = refp.spins[jel] + deltaS[ivp];
+    }
+    this->update();
+}
+
+template <typename T>
+void
+VirtualParticleSetT<T>::mw_makeMoves(
+    const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
+    const RefVector<const std::vector<PosType>>& deltaV_list,
+    const RefVector<const NLPPJob<RealType>>& joblist, bool sphere)
+{
+    auto& vp_leader = vp_list.getLeader();
+    vp_leader.onSphere = sphere;
+    vp_leader.refPS = refp_list.getLeader();
+
+    const size_t nVPs = countVPs(vp_list);
+    auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
+    mw_refPctls.resize(nVPs);
+
+    RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
+    p_list.reserve(vp_list.size());
+
+    size_t ivp = 0;
+    for (int iw = 0; iw < vp_list.size(); iw++) {
+        VirtualParticleSetT& vp(vp_list[iw]);
+        const std::vector<PosType>& deltaV(deltaV_list[iw]);
+        const NLPPJob<RealType>& job(joblist[iw]);
+
+        vp.onSphere = sphere;
+        vp.refPS = refp_list[iw];
+        vp.refPtcl = job.electron_id;
+        vp.refSourcePtcl = job.ion_id;
+        assert(vp.R.size() == deltaV.size());
+        for (size_t k = 0; k < vp.R.size(); k++, ivp++) {
+            vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k];
+            if (vp_leader.isSpinor())
+                vp.spins[k] =
+                    refp_list[iw]
+                        .spins[vp.refPtcl]; // no spin deltas in this API
+            mw_refPctls[ivp] = vp.refPtcl;
+        }
+        p_list.push_back(vp);
+    }
+    assert(ivp == nVPs);
+
+    mw_refPctls.updateTo();
+    ParticleSetT<T>::mw_update(p_list);
+}
+
+template <typename T>
+void
+VirtualParticleSetT<T>::mw_makeMovesWithSpin(
+    const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
+    const RefVector<const std::vector<PosType>>& deltaV_list,
+    const RefVector<const std::vector<RealType>>& deltaS_list,
+    const RefVector<const NLPPJob<RealType>>& joblist, bool sphere)
+{
+    auto& vp_leader = vp_list.getLeader();
+    if (!vp_leader.isSpinor())
+        throw std::runtime_error(
+            "VirtualParticleSet::mw_makeMovesWithSpin should not be called if "
+            "particle sets aren't spionor types");
+    vp_leader.onSphere = sphere;
+    vp_leader.refPS = refp_list.getLeader();
+
+    const size_t nVPs = countVPs(vp_list);
+    auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
+    mw_refPctls.resize(nVPs);
+
+    RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
+    p_list.reserve(vp_list.size());
+
+    size_t ivp = 0;
+    for (int iw = 0; iw < vp_list.size(); iw++) {
+        VirtualParticleSetT& vp(vp_list[iw]);
+        const std::vector<PosType>& deltaV(deltaV_list[iw]);
+        const std::vector<RealType>& deltaS(deltaS_list[iw]);
+        const NLPPJob<RealType>& job(joblist[iw]);
+
+        vp.onSphere = sphere;
+        vp.refPS = refp_list[iw];
+        vp.refPtcl = job.electron_id;
+        vp.refSourcePtcl = job.ion_id;
+        assert(vp.R.size() == deltaV.size());
+        assert(vp.spins.size() == deltaS.size());
+        assert(vp.R.size() == vp.spins.size());
+        for (size_t k = 0; k < vp.R.size(); k++, ivp++) {
+            vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k];
+            vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k];
+            mw_refPctls[ivp] = vp.refPtcl;
+        }
+        p_list.push_back(vp);
+    }
+    assert(ivp == nVPs);
+
+    mw_refPctls.updateTo();
+    ParticleSetT<T>::mw_update(p_list);
+}
+
+template class VirtualParticleSetT<double>;
+template class VirtualParticleSetT<float>;
+template class VirtualParticleSetT<std::complex<double>>;
+template class VirtualParticleSetT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/Particle/VirtualParticleSetT.h b/src/Particle/VirtualParticleSetT.h
new file mode 100644
index 0000000000..83e4d5aa57
--- /dev/null
+++ b/src/Particle/VirtualParticleSetT.h
@@ -0,0 +1,175 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H
+#define QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H
+
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "Particle/ParticleSetT.h"
+#include <ResourceHandle.h>
+
+namespace qmcplusplus
+{
+// forward declaration.
+class NonLocalECPComponent;
+template <typename T>
+struct NLPPJob;
+struct VPMultiWalkerMem;
+
+/** A ParticleSet that handles virtual moves of a selected particle of a given
+ * physical ParticleSet Virtual moves are defined as moves being proposed but
+ * will never be accepted. VirtualParticleSet is introduced to avoid changing
+ * any internal states of the physical ParticleSet. For this reason, the
+ * physical ParticleSet is always marked const. It is heavily used by non-local
+ * PP evaluations.
+ */
+template <typename T>
+class VirtualParticleSetT : public ParticleSetT<T>
+{
+public:
+    using RealType = typename ParticleSetT<T>::RealType;
+    using PosType = typename ParticleSetT<T>::PosType;
+
+private:
+    /// true, if virtual particles are on a sphere for NLPP
+    bool onSphere;
+    /// multi walker resource
+    ResourceHandle<VPMultiWalkerMem> mw_mem_handle_;
+
+    Vector<int, OffloadPinnedAllocator<int>>&
+    getMultiWalkerRefPctls();
+
+    /// ParticleSet this object refers to after makeMoves
+    std::optional<std::reference_wrapper<const ParticleSetT<T>>> refPS;
+
+public:
+    /// Reference particle
+    int refPtcl;
+    /// Reference source particle, used when onSphere=true
+    int refSourcePtcl;
+
+    /// ParticleSet this object refers to
+    const ParticleSetT<T>&
+    getRefPS() const
+    {
+        return refPS.value();
+    }
+
+    inline bool
+    isOnSphere() const
+    {
+        return onSphere;
+    }
+
+    const Vector<int, OffloadPinnedAllocator<int>>&
+    getMultiWalkerRefPctls() const;
+
+    /** constructor
+     * @param p ParticleSet whose virtual moves are handled by this object
+     * @param nptcl number of virtual particles
+     * @param dt_count_limit distance tables corresepond to [0, dt_count_limit)
+     * of the reference particle set are created
+     */
+    VirtualParticleSetT(
+        const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit = 0);
+
+    ~VirtualParticleSetT();
+
+    /// initialize a shared resource and hand it to a collection
+    void
+    createResource(ResourceCollection& collection) const;
+    /** acquire external resource and assocaite it with the list of ParticleSet
+     * Note: use RAII ResourceCollectionTeamLock whenever possible
+     */
+    static void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
+    /** release external resource
+     * Note: use RAII ResourceCollectionTeamLock whenever possible
+     */
+    static void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
+
+    /** move virtual particles to new postions and update distance tables
+     * @param refp reference particle set
+     * @param jel reference particle that all the VP moves from
+     * @param deltaV Position delta for virtual moves.
+     * @param sphere set true if VP are on a sphere around the reference source
+     * particle
+     * @param iat reference source particle
+     */
+    void
+    makeMoves(const ParticleSetT<T>& refp, int jel,
+        const std::vector<PosType>& deltaV, bool sphere = false, int iat = -1);
+
+    /** move virtual particles to new postions and update distance tables
+     * @param refp reference particle set
+     * @param jel reference particle that all the VP moves from
+     * @param deltaV Position delta for virtual moves.
+     * @param deltaS Spin delta for virtual moves.
+     * @param sphere set true if VP are on a sphere around the reference source
+     * particle
+     * @param iat reference source particle
+     */
+    void
+    makeMovesWithSpin(const ParticleSetT<T>& refp, int jel,
+        const std::vector<PosType>& deltaV, const std::vector<RealType>& deltaS,
+        bool sphere = false, int iat = -1);
+
+    static void
+    mw_makeMoves(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const RefVector<const std::vector<PosType>>& deltaV_list,
+        const RefVector<const NLPPJob<RealType>>& joblist, bool sphere);
+
+    static void
+    mw_makeMovesWithSpin(
+        const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+        const RefVector<const std::vector<PosType>>& deltaV_list,
+        const RefVector<const std::vector<RealType>>& deltaS_list,
+        const RefVector<const NLPPJob<RealType>>& joblist, bool sphere);
+
+    static RefVectorWithLeader<ParticleSetT<T>>
+    RefVectorWithLeaderParticleSet(
+        const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+    {
+        RefVectorWithLeader<ParticleSetT<T>> ref_list(vp_list.getLeader());
+        ref_list.reserve(ref_list.size());
+        for (VirtualParticleSetT& vp : vp_list)
+            ref_list.push_back(vp);
+        return ref_list;
+    }
+
+    static size_t
+    countVPs(const RefVectorWithLeader<const VirtualParticleSetT>& vp_list)
+    {
+        size_t nVPs = 0;
+        for (const VirtualParticleSetT& vp : vp_list)
+            nVPs += vp.getTotalNum();
+        return nVPs;
+    }
+
+    static size_t
+    countVPs(const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+    {
+        size_t nVPs = 0;
+        for (const VirtualParticleSetT& vp : vp_list)
+            nVPs += vp.getTotalNum();
+        return nVPs;
+    }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/createDistanceTableT.cpp b/src/Particle/createDistanceTableT.cpp
new file mode 100644
index 0000000000..1905aef3d8
--- /dev/null
+++ b/src/Particle/createDistanceTableT.cpp
@@ -0,0 +1,240 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "Particle/createDistanceTableT.h"
+
+#include "CPU/SIMD/algorithm.hpp"
+#include "Particle/DistanceTableT.h"
+#include "Particle/SoaDistanceTableAAT.h"
+#include "Particle/SoaDistanceTableAATOMPTarget.h"
+#include "Particle/SoaDistanceTableABT.h"
+#include "Particle/SoaDistanceTableABTOMPTarget.h"
+
+namespace qmcplusplus
+{
+/** Adding SymmetricDTD to the list, e.g., el-el distance table
+ *\param s source/target particle set
+ *\return index of the distance table with the name
+ */
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description)
+{
+    using RealType = typename ParticleSetT<T>::RealType;
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    const int sc = s.getLattice().SuperCellEnum;
+    std::unique_ptr<DistanceTableT<T>> dt;
+    std::ostringstream o;
+    o << "  Distance table for similar particles (A-A):" << std::endl;
+    o << "    source/target: " << s.getName() << std::endl;
+    o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
+
+    if (sc == SUPERCELL_BULK) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic periodic cell in "
+                 "3D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableAAT<T, DIM, PPPO + SOA_OFFSET>>(s);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "3D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAAT<T, DIM, PPPG + SOA_OFFSET>>(s);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "3D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAAT<T, DIM, PPPS + SOA_OFFSET>>(s);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_SLAB) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic code for periodic "
+                 "cell in 2D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableAAT<T, DIM, PPNO + SOA_OFFSET>>(s);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "2D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAAT<T, DIM, PPNG + SOA_OFFSET>>(s);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "2D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAAT<T, DIM, PPNS + SOA_OFFSET>>(s);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_WIRE) {
+        o << "    Distance computations use periodic cell in one dimension."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableAAT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
+    }
+    else // open boundary condition
+    {
+        o << "    Distance computations use open boundary conditions in 3D."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableAAT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
+    }
+
+    description << o.str() << std::endl;
+    return dt;
+}
+
+template std::unique_ptr<DistanceTableT<double>>
+createDistanceTableAAT<double>(
+    ParticleSetT<double>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>>
+createDistanceTableAAT<float>(
+    ParticleSetT<float>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>>
+createDistanceTableAAT<std::complex<double>>(
+    ParticleSetT<std::complex<double>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>>
+createDistanceTableAAT<std::complex<float>>(
+    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+
+/** Adding AsymmetricDTD to the list, e.g., el-el distance table
+ *\param s source/target particle set
+ *\return index of the distance table with the name
+ */
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableABT(
+    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+{
+    using RealType = typename ParticleSetT<T>::RealType;
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    const int sc = t.getLattice().SuperCellEnum;
+    std::unique_ptr<DistanceTableT<T>> dt;
+    std::ostringstream o;
+    o << "  Distance table for dissimilar particles (A-B):" << std::endl;
+    o << "    source: " << s.getName() << "  target: " << t.getName()
+      << std::endl;
+    o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
+
+    if (sc == SUPERCELL_BULK) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic periodic cell in "
+                 "3D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableABT<T, DIM, PPPO + SOA_OFFSET>>(s, t);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "3D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABT<T, DIM, PPPG + SOA_OFFSET>>(s, t);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "3D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABT<T, DIM, PPPS + SOA_OFFSET>>(s, t);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_SLAB) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic code for periodic "
+                 "cell in 2D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableABT<T, DIM, PPNO + SOA_OFFSET>>(s, t);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "2D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABT<T, DIM, PPNG + SOA_OFFSET>>(s, t);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "2D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABT<T, DIM, PPNS + SOA_OFFSET>>(s, t);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_WIRE) {
+        o << "    Distance computations use periodic cell in one dimension."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableABT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
+    }
+    else // open boundary condition
+    {
+        o << "    Distance computations use open boundary conditions in 3D."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableABT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
+    }
+
+    description << o.str() << std::endl;
+    return dt;
+}
+
+template std::unique_ptr<DistanceTableT<double>>
+createDistanceTableABT<double>(const ParticleSetT<double>& s,
+    ParticleSetT<double>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>>
+createDistanceTableABT<float>(const ParticleSetT<float>& s,
+    ParticleSetT<float>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>>
+createDistanceTableABT<std::complex<double>>(
+    const ParticleSetT<std::complex<double>>& s,
+    ParticleSetT<std::complex<double>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>>
+createDistanceTableABT<std::complex<float>>(
+    const ParticleSetT<std::complex<float>>& s,
+    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+} // namespace qmcplusplus
diff --git a/src/Particle/createDistanceTableT.h b/src/Particle/createDistanceTableT.h
new file mode 100644
index 0000000000..64b81aae1e
--- /dev/null
+++ b/src/Particle/createDistanceTableT.h
@@ -0,0 +1,89 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_DISTANCETABLET_H
+#define QMCPLUSPLUS_DISTANCETABLET_H
+
+#include "Particle/ParticleSetT.h"
+
+namespace qmcplusplus
+{
+/** Class to manage multiple DistanceTable objects.
+ *
+ * \date  2008-09-19
+ * static data members are removed. DistanceTable::add functions
+ * are kept for compatibility only. New codes should use a member function
+ * of ParticleSet to add a distance table
+ * int ParticleSet::addTable(const ParticleSet& source)
+ *
+ * \deprecated There is only one instance of the data memebers of
+ * DistanceTable in an application and the data are shared by many objects.
+ * Note that static data members and functions are used
+ * (based on singleton and factory patterns).
+ *\todo DistanceTable should work as a factory, as well, to instantiate
+ *DistanceTable subject to different boundary conditions.
+ * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by
+ *DistanceTable to generically control the crystalline structure.
+ */
+
+/// free function to create a distable table of s-s
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description);
+
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description);
+
+template <typename T>
+inline std::unique_ptr<DistanceTableT<T>>
+createDistanceTableT(ParticleSetT<T>& s, std::ostream& description)
+{
+    // during P-by-P move, the cost of single particle evaluation of distance
+    // tables is determined by the number of source particles. Thus the
+    // implementation selection is determined by the source particle set.
+    if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
+        return createDistanceTableAATOMPTarget(s, description);
+    else
+        return createDistanceTableAAT(s, description);
+}
+
+/// free function create a distable table of s-t
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableABT(
+    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description);
+
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableABTOMPTarget(
+    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description);
+
+template <typename T>
+inline std::unique_ptr<DistanceTableT<T>>
+createDistanceTableT(
+    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+{
+    // during P-by-P move, the cost of single particle evaluation of distance
+    // tables is determined by the number of source particles. Thus the
+    // implementation selection is determined by the source particle set.
+    if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
+        return createDistanceTableABTOMPTarget(s, t, description);
+    else
+        return createDistanceTableABT(s, t, description);
+}
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/Particle/createDistanceTableTOMPTarget.cpp b/src/Particle/createDistanceTableTOMPTarget.cpp
new file mode 100644
index 0000000000..afb4653184
--- /dev/null
+++ b/src/Particle/createDistanceTableTOMPTarget.cpp
@@ -0,0 +1,248 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "Particle/createDistanceTableT.h"
+
+#include "CPU/SIMD/algorithm.hpp"
+#include "Particle/DistanceTableT.h"
+#include "Particle/SoaDistanceTableAATOMPTarget.h"
+#include "Particle/SoaDistanceTableABTOMPTarget.h"
+
+namespace qmcplusplus
+{
+/** Adding SymmetricDTD to the list, e.g., el-el distance table
+ *\param s source/target particle set
+ *\return index of the distance table with the name
+ */
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description)
+{
+    using RealType = typename ParticleSetT<T>::RealType;
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    const int sc = s.getLattice().SuperCellEnum;
+    std::unique_ptr<DistanceTableT<T>> dt;
+    std::ostringstream o;
+    o << "  Distance table for similar particles (A-A):" << std::endl;
+    o << "    source/target: " << s.getName() << std::endl;
+    o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload"
+      << std::endl;
+
+    if (sc == SUPERCELL_BULK) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic periodic cell in "
+                 "3D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableAATOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "3D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAATOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(s);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "3D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAATOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(s);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_SLAB) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic code for periodic "
+                 "cell in 2D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableAATOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "2D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAATOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(s);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "2D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableAATOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(s);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_WIRE) {
+        o << "    Distance computations use periodic cell in one dimension."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(
+            s);
+    }
+    else // open boundary condition
+    {
+        o << "    Distance computations use open boundary conditions in 3D."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(
+            s);
+    }
+
+    description << o.str() << std::endl;
+    return dt;
+}
+
+template std::unique_ptr<DistanceTableT<double>>
+createDistanceTableAATOMPTarget<double>(
+    ParticleSetT<double>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>>
+createDistanceTableAATOMPTarget<float>(
+    ParticleSetT<float>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>>
+createDistanceTableAATOMPTarget<std::complex<double>>(
+    ParticleSetT<std::complex<double>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>>
+createDistanceTableAATOMPTarget<std::complex<float>>(
+    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+
+/** Adding AsymmetricDTD to the list, e.g., el-el distance table
+ *\param s source/target particle set
+ *\return index of the distance table with the name
+ */
+template <typename T>
+std::unique_ptr<DistanceTableT<T>>
+createDistanceTableABTOMPTarget(
+    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+{
+    using RealType = typename ParticleSetT<T>::RealType;
+    enum
+    {
+        DIM = OHMMS_DIM
+    };
+    const int sc = t.getLattice().SuperCellEnum;
+    std::unique_ptr<DistanceTableT<T>> dt;
+    std::ostringstream o;
+    o << "  Distance table for dissimilar particles (A-B):" << std::endl;
+    o << "    source: " << s.getName() << "  target: " << t.getName()
+      << std::endl;
+    o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload"
+      << std::endl;
+
+    if (sc == SUPERCELL_BULK) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic periodic cell in "
+                 "3D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableABTOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s, t);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "3D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABTOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(
+                    s, t);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "3D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABTOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(
+                    s, t);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_SLAB) {
+        if (s.getLattice().DiagonalOnly) {
+            o << "    Distance computations use orthorhombic code for periodic "
+                 "cell in 2D."
+              << std::endl;
+            dt = std::make_unique<
+                SoaDistanceTableABTOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s, t);
+        }
+        else {
+            if (s.getLattice().WignerSeitzRadius >
+                s.getLattice().SimulationCellRadius) {
+                o << "    Distance computations use general periodic cell in "
+                     "2D with corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABTOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(
+                    s, t);
+            }
+            else {
+                o << "    Distance computations use general periodic cell in "
+                     "2D without corner image checks."
+                  << std::endl;
+                dt = std::make_unique<
+                    SoaDistanceTableABTOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(
+                    s, t);
+            }
+        }
+    }
+    else if (sc == SUPERCELL_WIRE) {
+        o << "    Distance computations use periodic cell in one dimension."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(
+            s, t);
+    }
+    else // open boundary condition
+    {
+        o << "    Distance computations use open boundary conditions in 3D."
+          << std::endl;
+        dt = std::make_unique<
+            SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(
+            s, t);
+    }
+
+    description << o.str() << std::endl;
+    return dt;
+}
+
+template std::unique_ptr<DistanceTableT<double>>
+createDistanceTableABTOMPTarget<double>(const ParticleSetT<double>& s,
+    ParticleSetT<double>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>>
+createDistanceTableABTOMPTarget<float>(const ParticleSetT<float>& s,
+    ParticleSetT<float>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>>
+createDistanceTableABTOMPTarget<std::complex<double>>(
+    const ParticleSetT<std::complex<double>>& s,
+    ParticleSetT<std::complex<double>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>>
+createDistanceTableABTOMPTarget<std::complex<float>>(
+    const ParticleSetT<std::complex<float>>& s,
+    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BasisSetBaseT.h b/src/QMCWaveFunctions/BasisSetBaseT.h
new file mode 100644
index 0000000000..e6c8bd9e99
--- /dev/null
+++ b/src/QMCWaveFunctions/BasisSetBaseT.h
@@ -0,0 +1,222 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory Jeremy McMinnis, jmcminis@gmail.com,
+//                    University of Illinois at Urbana-Champaign Jaron T.
+//                    Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_BASISSETBASET_H
+#define QMCPLUSPLUS_BASISSETBASET_H
+
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "Particle/ParticleSetT.h"
+#include "QMCWaveFunctions/OrbitalSetTraits.h"
+
+namespace qmcplusplus
+{
+/** base class for a basis set
+ *
+ * Define a common storage for the derived classes and
+ * provides  a minimal set of interfaces to get/set BasisSetSize.
+ */
+template <typename T>
+struct BasisSetBaseT : public OrbitalSetTraits<T>
+{
+    enum
+    {
+        MAXINDEX = 2 + OHMMS_DIM
+    };
+    using RealType = typename OrbitalSetTraits<T>::RealType;
+    using ValueType = typename OrbitalSetTraits<T>::ValueType;
+    using IndexType = typename OrbitalSetTraits<T>::IndexType;
+    using HessType = typename OrbitalSetTraits<T>::HessType;
+    using IndexVector = typename OrbitalSetTraits<T>::IndexVector;
+    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
+    using ValueMatrix = typename OrbitalSetTraits<T>::ValueMatrix;
+    using GradVector = typename OrbitalSetTraits<T>::GradVector;
+    using GradMatrix = typename OrbitalSetTraits<T>::GradMatrix;
+    using HessVector = typename OrbitalSetTraits<T>::HessVector;
+    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
+    using GGGType = TinyVector<HessType, OHMMS_DIM>;
+    using GGGVector = Vector<GGGType>;
+    using GGGMatrix = Matrix<GGGType>;
+
+    /// size of the basis set
+    IndexType BasisSetSize;
+    /// index of the particle
+    IndexType ActivePtcl;
+    /// counter to keep track
+    unsigned long Counter;
+    /// phi[i] the value of the i-th basis set
+    ValueVector Phi;
+    /// dphi[i] the gradient of the i-th basis set
+    GradVector dPhi;
+    /// d2phi[i] the laplacian of the i-th basis set
+    ValueVector d2Phi;
+    /// grad_grad_Phi[i] the full hessian of the i-th basis set
+    HessVector grad_grad_Phi;
+    /// grad_grad_grad_Phi the full hessian of the i-th basis set
+    GGGVector grad_grad_grad_Phi;
+    /// container to store value, laplacian and gradient
+    ValueMatrix Temp;
+
+    ValueMatrix Y;
+    GradMatrix dY;
+    ValueMatrix d2Y;
+
+    /// default constructor
+    BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0)
+    {
+    }
+    /// virtual destructor
+    virtual ~BasisSetBaseT()
+    {
+    }
+    /** resize the container */
+    void
+    resize(int ntargets)
+    {
+        if (BasisSetSize) {
+            Phi.resize(BasisSetSize);
+            dPhi.resize(BasisSetSize);
+            d2Phi.resize(BasisSetSize);
+            grad_grad_Phi.resize(BasisSetSize);
+            grad_grad_grad_Phi.resize(BasisSetSize);
+            Temp.resize(BasisSetSize, MAXINDEX);
+            Y.resize(ntargets, BasisSetSize);
+            dY.resize(ntargets, BasisSetSize);
+            d2Y.resize(ntargets, BasisSetSize);
+        }
+        else {
+            app_error() << "  BasisSetBase::BasisSetSize == 0" << std::endl;
+        }
+    }
+
+    /// clone the basis set
+    virtual BasisSetBaseT*
+    makeClone() const = 0;
+    /** return the basis set size */
+    inline IndexType
+    getBasisSetSize() const
+    {
+        return BasisSetSize;
+    }
+
+    /// resize the basis set
+    virtual void
+    setBasisSetSize(int nbs) = 0;
+
+    virtual void
+    evaluateWithHessian(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateWithThirdDeriv(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateThirdDerivOnly(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateForWalkerMove(const ParticleSetT<T>& P) = 0;
+    virtual void
+    evaluateForWalkerMove(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateForPtclMove(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateAllForPtclMove(const ParticleSetT<T>& P, int iat) = 0;
+    virtual void
+    evaluateForPtclMoveWithHessian(const ParticleSetT<T>& P, int iat) = 0;
+};
+
+/** Base for real basis set
+ *
+ * Equivalent to BasisSetBase with minimum requirements
+ * Used by LCAO
+ */
+template <typename T>
+struct SoaBasisSetBaseT
+{
+    using value_type = T;
+    using vgl_type = VectorSoaContainer<T, OHMMS_DIM + 2>;
+    using vgh_type = VectorSoaContainer<T, 10>;
+    using vghgh_type = VectorSoaContainer<T, 20>;
+    using OffloadMWVGLArray =
+        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+    using OffloadMWVArray =
+        Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+
+    /// size of the basis set
+    int BasisSetSize;
+
+    virtual ~SoaBasisSetBaseT() = default;
+    inline int
+    getBasisSetSize()
+    {
+        return BasisSetSize;
+    }
+
+    virtual SoaBasisSetBaseT<T>*
+    makeClone() const = 0;
+    virtual void
+    setBasisSetSize(int nbs) = 0;
+
+    // Evaluates value, gradient, and laplacian for electron "iat".  Parks them
+    // into a temporary data structure "vgl".
+    virtual void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, vgl_type& vgl) = 0;
+    // Evaluates value, gradient, and laplacian for electron "iat".  places them
+    // in a offload array for batched code.
+    virtual void
+    mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        OffloadMWVGLArray& vgl) = 0;
+    // Evaluates value for electron "iat".  places it in a offload array for
+    // batched code.
+    virtual void
+    mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+        int iat, OffloadMWVArray& v) = 0;
+    // Evaluates value, gradient, and Hessian for electron "iat".  Parks them
+    // into a temporary data structure "vgh".
+    virtual void
+    evaluateVGH(const ParticleSetT<T>& P, int iat, vgh_type& vgh) = 0;
+    // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron
+    // "iat".  Parks them into a temporary data structure "vghgh".
+    virtual void
+    evaluateVGHGH(const ParticleSetT<T>& P, int iat, vghgh_type& vghgh) = 0;
+    // Evaluates the x,y, and z components of ionic gradient associated with
+    // "jion" of value.  Parks the raw data into "vgl" container.
+    virtual void
+    evaluateGradSourceV(const ParticleSetT<T>& P, int iat,
+        const ParticleSetT<T>& ions, int jion, vgl_type& vgl) = 0;
+    // Evaluates the x,y, and z components of ionic gradient associated with
+    // "jion" value, gradient, and laplacian.
+    //     Parks the raw data into "vghgh" container.
+    virtual void
+    evaluateGradSourceVGL(const ParticleSetT<T>& P, int iat,
+        const ParticleSetT<T>& ions, int jion, vghgh_type& vghgh) = 0;
+    virtual void
+    evaluateV(const ParticleSetT<T>& P, int iat, value_type* restrict vals) = 0;
+    virtual bool
+    is_S_orbital(int mo_idx, int ao_idx)
+    {
+        return false;
+    }
+
+    /// Determine which orbitals are S-type.  Used for cusp correction.
+    virtual void
+    queryOrbitalsForSType(const std::vector<bool>& corrCenter,
+        std::vector<bool>& is_s_orbital) const
+    {
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
index 35f1580d16..7d5d19b323 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
@@ -1,22 +1,25 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+//                    Laboratory Jeongnim Kim, jeongnim.kim@gmail.com,
+//                    University of Illinois at Urbana-Champaign Mark A.
+//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 /** @file BsplineSetT.h
  *
- * BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implementation
+ * BsplineSet is a SPOSet derived class and serves as a base class for B-spline
+ * SPO C2C/C2R/R2R implementation
  */
 #ifndef QMCPLUSPLUS_BSPLINESETT_H
 #define QMCPLUSPLUS_BSPLINESETT_H
@@ -28,221 +31,226 @@
 namespace qmcplusplus
 {
 /** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R.
- * Its derived template classes manage the storage and evaluation at given precision.
- * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class.
+ * Its derived template classes manage the storage and evaluation at given
+ * precision. BsplineSet also implements a few fallback routines in case
+ * optimized implementation is not necessary in the derived class.
  */
-template<class T>
+template <class T>
 class BsplineSetT : public SPOSetT<T>
 {
 public:
-  using PosType     = typename SPOSetT<T>::PosType;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using HessVector  = typename SPOSetT<T>::HessVector;
-  using GGGVector   = typename SPOSetT<T>::GGGVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
-  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
-
-  using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
-  using grad_type  = typename SPOSetT<T>::GradMatrix::value_type;
-
-  // used in derived classes
-  using RealType  = typename SPOSetT<T>::RealType;
-  using ValueType = typename SPOSetT<T>::ValueType;
-
-  BsplineSetT(const std::string& my_name) : SPOSetT<T>(my_name), MyIndex(0), first_spo(0), last_spo(0) {}
-
-  virtual bool isComplex() const         = 0;
-  virtual std::string getKeyword() const = 0;
-
-  auto& getHalfG() const { return HalfG; }
-
-  inline void init_base(int n)
-  {
-    kPoints.resize(n);
-    MakeTwoCopies.resize(n);
-    BandIndexMap.resize(n);
-    for (int i = 0; i < n; i++)
-      BandIndexMap[i] = i;
-  }
-
-  ///remap kpoints to group general kpoints & special kpoints
-  int remap_kpoints()
-  {
-    std::vector<PosType> k_copy(kPoints);
-    const int nk = kPoints.size();
-    int nCB      = 0;
-    //two pass
-    for (int i = 0; i < nk; ++i)
+    using PosType = typename SPOSetT<T>::PosType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using HessVector = typename SPOSetT<T>::HessVector;
+    using GGGVector = typename SPOSetT<T>::GGGVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using HessMatrix = typename SPOSetT<T>::HessMatrix;
+    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+
+    using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
+    using grad_type = typename SPOSetT<T>::GradMatrix::value_type;
+
+    // used in derived classes
+    using RealType = typename SPOSetT<T>::RealType;
+    using ValueType = typename SPOSetT<T>::ValueType;
+
+    BsplineSetT(const std::string& my_name) :
+        SPOSetT<T>(my_name),
+        MyIndex(0),
+        first_spo(0),
+        last_spo(0)
+    {
+    }
+
+    virtual bool
+    isComplex() const = 0;
+    virtual std::string
+    getKeyword() const = 0;
+
+    auto&
+    getHalfG() const
+    {
+        return HalfG;
+    }
+
+    inline void
+    init_base(int n)
+    {
+        kPoints.resize(n);
+        MakeTwoCopies.resize(n);
+        BandIndexMap.resize(n);
+        for (int i = 0; i < n; i++)
+            BandIndexMap[i] = i;
+    }
+
+    /// remap kpoints to group general kpoints & special kpoints
+    int
+    remap_kpoints()
     {
-      if (MakeTwoCopies[i])
-      {
-        kPoints[nCB]        = k_copy[i];
-        BandIndexMap[nCB++] = i;
-      }
+        std::vector<PosType> k_copy(kPoints);
+        const int nk = kPoints.size();
+        int nCB = 0;
+        // two pass
+        for (int i = 0; i < nk; ++i) {
+            if (MakeTwoCopies[i]) {
+                kPoints[nCB] = k_copy[i];
+                BandIndexMap[nCB++] = i;
+            }
+        }
+        int nRealBands = nCB;
+        for (int i = 0; i < nk; ++i) {
+            if (!MakeTwoCopies[i]) {
+                kPoints[nRealBands] = k_copy[i];
+                BandIndexMap[nRealBands++] = i;
+            }
+        }
+        return nCB; // return the number of complex bands
     }
-    int nRealBands = nCB;
-    for (int i = 0; i < nk; ++i)
+
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override = 0;
+
+    void
+    setOrbitalSetSize(int norbs) override
     {
-      if (!MakeTwoCopies[i])
-      {
-        kPoints[nRealBands]        = k_copy[i];
-        BandIndexMap[nRealBands++] = i;
-      }
+        this->OrbitalSetSize = norbs;
     }
-    return nCB; //return the number of complex bands
-  }
 
-  std::unique_ptr<SPOSetT<T>> makeClone() const override = 0;
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override
+    {
+        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+            ValueVector v(logdet[i], logdet.cols());
+            GradVector g(dlogdet[i], dlogdet.cols());
+            ValueVector l(d2logdet[i], d2logdet.cols());
+            this->evaluateVGL(P, iat, v, g, l);
+        }
+    }
+
+    void
+    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
+        const RefVector<ValueMatrix>& logdet_list,
+        const RefVector<GradMatrix>& dlogdet_list,
+        const RefVector<ValueMatrix>& d2logdet_list) const override
+    {
+        assert(this == &spo_list.getLeader());
+        const size_t nw = spo_list.size();
+        std::vector<ValueVector> mw_psi_v;
+        std::vector<GradVector> mw_dpsi_v;
+        std::vector<ValueVector> mw_d2psi_v;
+        RefVector<ValueVector> psi_v_list;
+        RefVector<GradVector> dpsi_v_list;
+        RefVector<ValueVector> d2psi_v_list;
+        mw_psi_v.reserve(nw);
+        mw_dpsi_v.reserve(nw);
+        mw_d2psi_v.reserve(nw);
+        psi_v_list.reserve(nw);
+        dpsi_v_list.reserve(nw);
+        d2psi_v_list.reserve(nw);
 
-  void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; }
+        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+            mw_psi_v.clear();
+            mw_dpsi_v.clear();
+            mw_d2psi_v.clear();
+            psi_v_list.clear();
+            dpsi_v_list.clear();
+            d2psi_v_list.clear();
 
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override
-  {
-    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+            for (int iw = 0; iw < nw; iw++) {
+                mw_psi_v.emplace_back(
+                    logdet_list[iw].get()[i], logdet_list[iw].get().cols());
+                mw_dpsi_v.emplace_back(
+                    dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
+                mw_d2psi_v.emplace_back(
+                    d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
+                psi_v_list.push_back(mw_psi_v.back());
+                dpsi_v_list.push_back(mw_dpsi_v.back());
+                d2psi_v_list.push_back(mw_d2psi_v.back());
+            }
+
+            this->mw_evaluateVGL(
+                spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+        }
+    }
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        HessMatrix& grad_grad_logdet) override
     {
-      ValueVector v(logdet[i], logdet.cols());
-      GradVector g(dlogdet[i], dlogdet.cols());
-      ValueVector l(d2logdet[i], d2logdet.cols());
-      this->evaluateVGL(P, iat, v, g, l);
+        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+            ValueVector v(logdet[i], logdet.cols());
+            GradVector g(dlogdet[i], dlogdet.cols());
+            HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+            this->evaluateVGH(P, iat, v, g, h);
+        }
     }
-  }
-
-  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                               const RefVectorWithLeader<ParticleSet>& P_list,
-                               int first,
-                               int last,
-                               const RefVector<ValueMatrix>& logdet_list,
-                               const RefVector<GradMatrix>& dlogdet_list,
-                               const RefVector<ValueMatrix>& d2logdet_list) const override
-  {
-    assert(this == &spo_list.getLeader());
-    const size_t nw = spo_list.size();
-    std::vector<ValueVector> mw_psi_v;
-    std::vector<GradVector> mw_dpsi_v;
-    std::vector<ValueVector> mw_d2psi_v;
-    RefVector<ValueVector> psi_v_list;
-    RefVector<GradVector> dpsi_v_list;
-    RefVector<ValueVector> d2psi_v_list;
-    mw_psi_v.reserve(nw);
-    mw_dpsi_v.reserve(nw);
-    mw_d2psi_v.reserve(nw);
-    psi_v_list.reserve(nw);
-    dpsi_v_list.reserve(nw);
-    d2psi_v_list.reserve(nw);
-
-    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
+        GGGMatrix& grad_grad_grad_logdet) override
     {
-      mw_psi_v.clear();
-      mw_dpsi_v.clear();
-      mw_d2psi_v.clear();
-      psi_v_list.clear();
-      dpsi_v_list.clear();
-      d2psi_v_list.clear();
-
-      for (int iw = 0; iw < nw; iw++)
-      {
-        mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols());
-        mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
-        mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
-        psi_v_list.push_back(mw_psi_v.back());
-        dpsi_v_list.push_back(mw_dpsi_v.back());
-        d2psi_v_list.push_back(mw_d2psi_v.back());
-      }
-
-      this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+            ValueVector v(logdet[i], logdet.cols());
+            GradVector g(dlogdet[i], dlogdet.cols());
+            HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+            GGGVector gh(
+                grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
+            this->evaluateVGHGH(P, iat, v, g, h, gh);
+        }
     }
-  }
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& grad_grad_logdet) override
-  {
-    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src,
+        GradMatrix& gradphi) override
     {
-      ValueVector v(logdet[i], logdet.cols());
-      GradVector g(dlogdet[i], dlogdet.cols());
-      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
-      this->evaluateVGH(P, iat, v, g, h);
+        // Do nothing, since Einsplines don't explicitly depend on ion
+        // positions.
     }
-  }
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& grad_grad_logdet,
-                            GGGMatrix& grad_grad_grad_logdet) override
-  {
-    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override
     {
-      ValueVector v(logdet[i], logdet.cols());
-      GradVector g(dlogdet[i], dlogdet.cols());
-      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
-      GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
-      this->evaluateVGHGH(P, iat, v, g, h, gh);
+        // Do nothing, since Einsplines don't explicitly depend on ion
+        // positions.
     }
-  }
-
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& gradphi) override
-  {
-    //Do nothing, since Einsplines don't explicitly depend on ion positions.
-  }
-
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& grad_phi,
-                          HessMatrix& grad_grad_phi,
-                          GradMatrix& grad_lapl_phi) override
-  {
-    //Do nothing, since Einsplines don't explicitly depend on ion positions.
-  }
-
-  template<class BSPLINESPO>
-  friend struct SplineSetReader;
-  friend struct BsplineReaderBase;
 
+    template <class BSPLINESPO>
+    friend struct SplineSetReader;
+    friend struct BsplineReaderBase;
 
 protected:
-  static const int D = QMCTraits::DIM;
-  ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases
-  size_t MyIndex;
-  ///first index of the SPOs this Spline handles
-  size_t first_spo;
-  ///last index of the SPOs this Spline handles
-  size_t last_spo;
-  ///sign bits at the G/2 boundaries
-  TinyVector<int, D> HalfG;
-  ///flags to unpack sin/cos
-  std::vector<bool> MakeTwoCopies;
-  /** kpoints for each unique orbitals.
-   * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated.
-   * Changing the sign requires updating all the evaluation code.
-   */
-  std::vector<PosType> kPoints;
-  ///remap splines to orbitals
-  aligned_vector<int> BandIndexMap;
-  ///band offsets used for communication
-  std::vector<int> offset;
+    static const int D = QMCTraits::DIM;
+    /// Index of this adoptor, when multiple adoptors are used for NUMA or
+    /// distributed cases
+    size_t MyIndex;
+    /// first index of the SPOs this Spline handles
+    size_t first_spo;
+    /// last index of the SPOs this Spline handles
+    size_t last_spo;
+    /// sign bits at the G/2 boundaries
+    TinyVector<int, D> HalfG;
+    /// flags to unpack sin/cos
+    std::vector<bool> MakeTwoCopies;
+    /** kpoints for each unique orbitals.
+     * Note: for historic reason, this sign is opposite to what was used in DFT
+     * when orbitals were generated. Changing the sign requires updating all the
+     * evaluation code.
+     */
+    std::vector<PosType> kPoints;
+    /// remap splines to orbitals
+    aligned_vector<int> BandIndexMap;
+    /// band offsets used for communication
+    std::vector<int> offset;
 };
 
 
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
index 155dd8a220..e6b05e4cd3 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
@@ -170,7 +170,7 @@ inline void SplineC2CT<T>::assign_v(const PointType& r,
 }
 
 template<class T>
-void SplineC2CT<T>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
+void SplineC2CT<T>::evaluateValue(const ParticleSetT<T>& P, const int iat, ValueVector& psi)
 {
   const PointType& r = P.activeR(iat);
   PointType ru(PrimLattice.toUnit_floor(r));
@@ -187,7 +187,7 @@ void SplineC2CT<T>::evaluateValue(const ParticleSet& P, const int iat, ValueVect
 }
 
 template<class T>
-void SplineC2CT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+void SplineC2CT<T>::evaluateDetRatios(const VirtualParticleSetT<T>& VP,
                                       ValueVector& psi,
                                       const ValueVector& psiinv,
                                       std::vector<ValueType>& ratios)
@@ -376,7 +376,7 @@ inline void SplineC2CT<T>::assign_vgl_from_l(const PointType& r, ValueVector& ps
 }
 
 template<class T>
-void SplineC2CT<T>::evaluateVGL(const ParticleSet& P,
+void SplineC2CT<T>::evaluateVGL(const ParticleSetT<T>& P,
                                 const int iat,
                                 ValueVector& psi,
                                 GradVector& dpsi,
@@ -517,7 +517,7 @@ void SplineC2CT<T>::assign_vgh(const PointType& r,
 }
 
 template<class T>
-void SplineC2CT<T>::evaluateVGH(const ParticleSet& P,
+void SplineC2CT<T>::evaluateVGH(const ParticleSetT<T>& P,
                                 const int iat,
                                 ValueVector& psi,
                                 GradVector& dpsi,
@@ -774,7 +774,7 @@ void SplineC2CT<T>::assign_vghgh(const PointType& r,
 }
 
 template<class T>
-void SplineC2CT<T>::evaluateVGHGH(const ParticleSet& P,
+void SplineC2CT<T>::evaluateVGHGH(const ParticleSetT<T>& P,
                                   const int iat,
                                   ValueVector& psi,
                                   GradVector& dpsi,
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
index fd55fcd9f2..a7ba99e272 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
@@ -149,9 +149,9 @@ class SplineC2CT : public BsplineSetT<T>
 
   void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
 
-  void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override;
+  void evaluateValue(const ParticleSetT<T>& P, const int iat, ValueVector& psi) override;
 
-  void evaluateDetRatios(const VirtualParticleSet& VP,
+  void evaluateDetRatios(const VirtualParticleSetT<T>& VP,
                          ValueVector& psi,
                          const ValueVector& psiinv,
                          std::vector<ValueType>& ratios) override;
@@ -165,7 +165,7 @@ class SplineC2CT : public BsplineSetT<T>
    */
   void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
 
-  void evaluateVGL(const ParticleSet& P,
+  void evaluateVGL(const ParticleSetT<T>& P,
                    const int iat,
                    ValueVector& psi,
                    GradVector& dpsi,
@@ -178,7 +178,7 @@ class SplineC2CT : public BsplineSetT<T>
                   int first,
                   int last) const;
 
-  void evaluateVGH(const ParticleSet& P,
+  void evaluateVGH(const ParticleSetT<T>& P,
                    const int iat,
                    ValueVector& psi,
                    GradVector& dpsi,
@@ -192,7 +192,7 @@ class SplineC2CT : public BsplineSetT<T>
                     int first = 0,
                     int last  = -1) const;
 
-  void evaluateVGHGH(const ParticleSet& P,
+  void evaluateVGHGH(const ParticleSetT<T>& P,
                      const int iat,
                      ValueVector& psi,
                      GradVector& dpsi,
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
index e4695e6c11..176cb5dee8 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
@@ -1,64 +1,68 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo,
+//                    yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
+#include "SplineR2RT.h"
 
 #include "Concurrency/OpenMP.h"
-#include "SplineR2RT.h"
-#include "spline2/MultiBsplineEval.hpp"
 #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp"
+#include "spline2/MultiBsplineEval.hpp"
 
 namespace qmcplusplus
 {
-template<typename ST>
+template <typename ST>
 SplineR2RT<ST>::SplineR2RT(const SplineR2RT& in) = default;
 
-template<typename ST>
-inline void SplineR2RT<ST>::set_spline(SingleSplineType* spline_r,
-                                      SingleSplineType* spline_i,
-                                      int twist,
-                                      int ispline,
-                                      int level)
+template <typename ST>
+inline void
+SplineR2RT<ST>::set_spline(SingleSplineType* spline_r,
+    SingleSplineType* spline_i, int twist, int ispline, int level)
 {
-  SplineInst->copy_spline(spline_r, ispline);
+    SplineInst->copy_spline(spline_r, ispline);
 }
 
-template<typename ST>
-bool SplineR2RT<ST>::read_splines(hdf_archive& h5f)
+template <typename ST>
+bool
+SplineR2RT<ST>::read_splines(hdf_archive& h5f)
 {
-  std::ostringstream o;
-  o << "spline_" << this->MyIndex;
-  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+    std::ostringstream o;
+    o << "spline_" << this->MyIndex;
+    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+    return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template<typename ST>
-bool SplineR2RT<ST>::write_splines(hdf_archive& h5f)
+template <typename ST>
+bool
+SplineR2RT<ST>::write_splines(hdf_archive& h5f)
 {
-  std::ostringstream o;
-  o << "spline_" << this->MyIndex;
-  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+    std::ostringstream o;
+    o << "spline_" << this->MyIndex;
+    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+    return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template<typename ST>
-void SplineR2RT<ST>::storeParamsBeforeRotation()
+template <typename ST>
+void
+SplineR2RT<ST>::storeParamsBeforeRotation()
 {
-  const auto spline_ptr     = SplineInst->getSplinePtr();
-  const auto coefs_tot_size = spline_ptr->coefs_size;
-  coef_copy_                = std::make_shared<std::vector<RealType>>(coefs_tot_size);
+    const auto spline_ptr = SplineInst->getSplinePtr();
+    const auto coefs_tot_size = spline_ptr->coefs_size;
+    coef_copy_ = std::make_shared<std::vector<RealType>>(coefs_tot_size);
 
-  std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
+    std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
 }
 
 /*
@@ -100,458 +104,497 @@ void SplineR2RT<ST>::storeParamsBeforeRotation()
   NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs
   "matrix" is very tall and skinny.
 */
-template<typename ST>
-void SplineR2RT<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+template <typename ST>
+void
+SplineR2RT<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
 {
-  // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp
-  const auto spline_ptr = SplineInst->getSplinePtr();
-  assert(spline_ptr != nullptr);
-  const auto spl_coefs      = spline_ptr->coefs;
-  const auto Nsplines       = spline_ptr->num_splines; // May include padding
-  const auto coefs_tot_size = spline_ptr->coefs_size;
-  const auto BasisSetSize   = coefs_tot_size / Nsplines;
-  const auto TrueNOrbs      = rot_mat.size1(); // == Nsplines - padding
-  assert(this->OrbitalSetSize == rot_mat.rows());
-  assert(this->OrbitalSetSize == rot_mat.cols());
-
-  if (!use_stored_copy)
-  {
-    assert(coef_copy_ != nullptr);
-    std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
-  }
-
-  // Apply rotation the dumb way b/c I can't get BLAS::gemm to work...
-  for (auto i = 0; i < BasisSetSize; i++)
-  {
-    for (auto j = 0; j < this->OrbitalSetSize; j++)
-    {
-      const auto cur_elem = Nsplines * i + j;
-      auto newval{0.};
-      for (auto k = 0; k < this->OrbitalSetSize; k++)
-      {
-        const auto index = i * Nsplines + k;
-        newval += (*coef_copy_)[index] * rot_mat[k][j];
-      }
-      spl_coefs[cur_elem] = newval;
+    // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp
+    const auto spline_ptr = SplineInst->getSplinePtr();
+    assert(spline_ptr != nullptr);
+    const auto spl_coefs = spline_ptr->coefs;
+    const auto Nsplines = spline_ptr->num_splines; // May include padding
+    const auto coefs_tot_size = spline_ptr->coefs_size;
+    const auto BasisSetSize = coefs_tot_size / Nsplines;
+    const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding
+    assert(this->OrbitalSetSize == rot_mat.rows());
+    assert(this->OrbitalSetSize == rot_mat.cols());
+
+    if (!use_stored_copy) {
+        assert(coef_copy_ != nullptr);
+        std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
     }
-  }
-}
 
+    // Apply rotation the dumb way b/c I can't get BLAS::gemm to work...
+    for (auto i = 0; i < BasisSetSize; i++) {
+        for (auto j = 0; j < this->OrbitalSetSize; j++) {
+            const auto cur_elem = Nsplines * i + j;
+            auto newval{0.};
+            for (auto k = 0; k < this->OrbitalSetSize; k++) {
+                const auto index = i * Nsplines + k;
+                newval += (*coef_copy_)[index] * rot_mat[k][j];
+            }
+            spl_coefs[cur_elem] = newval;
+        }
+    }
+}
 
-template<typename ST>
-inline void SplineR2RT<ST>::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last)
-    const
+template <typename ST>
+inline void
+SplineR2RT<ST>::assign_v(int bc_sign, const vContainer_type& myV,
+    ValueVector& psi, int first, int last) const
 {
-  // protect last
-  last = last > this->kPoints.size() ? this->kPoints.size() : last;
+    // protect last
+    last = last > this->kPoints.size() ? this->kPoints.size() : last;
 
-  const ST signed_one = (bc_sign & 1) ? -1 : 1;
+    const ST signed_one = (bc_sign & 1) ? -1 : 1;
 #pragma omp simd
-  for (size_t j = first; j < last; ++j)
-    psi[this->first_spo + j] = signed_one * myV[j];
+    for (size_t j = first; j < last; ++j)
+        psi[this->first_spo + j] = signed_one * myV[j];
 }
 
-template<typename ST>
-void SplineR2RT<ST>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
+template <typename ST>
+void
+SplineR2RT<ST>::evaluateValue(
+    const ParticleSetT<ST>& P, const int iat, ValueVector& psi)
 {
-  const PointType& r = P.activeR(iat);
-  PointType ru;
-  int bc_sign = convertPos(r, ru);
+    const PointType& r = P.activeR(iat);
+    PointType ru;
+    int bc_sign = convertPos(r, ru);
 
 #pragma omp parallel
-  {
-    int first, last;
-    FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+    {
+        int first, last;
+        FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(),
+            omp_get_thread_num(), first, last);
 
-    spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
-    assign_v(bc_sign, myV, psi, first, last);
-  }
+        spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+        assign_v(bc_sign, myV, psi, first, last);
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::evaluateDetRatios(const VirtualParticleSet& VP,
-                                      ValueVector& psi,
-                                      const ValueVector& psiinv,
-                                      std::vector<TT>& ratios)
+template <typename ST>
+void
+SplineR2RT<ST>::evaluateDetRatios(const VirtualParticleSetT<ST>& VP,
+    ValueVector& psi, const ValueVector& psiinv, std::vector<TT>& ratios)
 {
-  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
+    const bool need_resize = ratios_private.rows() < VP.getTotalNum();
 
 #pragma omp parallel
-  {
-    int tid = omp_get_thread_num();
-    // initialize thread private ratios
-    if (need_resize)
     {
-      if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime
-        ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
+        int tid = omp_get_thread_num();
+        // initialize thread private ratios
+        if (need_resize) {
+            if (tid == 0) // just like #pragma omp master, but one fewer call to
+                          // the runtime
+                ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
 #pragma omp barrier
+        }
+        int first, last;
+        FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(),
+            tid, first, last);
+        const int last_real =
+            this->kPoints.size() < last ? this->kPoints.size() : last;
+
+        for (int iat = 0; iat < VP.getTotalNum(); ++iat) {
+            const PointType& r = VP.activeR(iat);
+            PointType ru;
+            int bc_sign = convertPos(r, ru);
+
+            spline2::evaluate3d(
+                SplineInst->getSplinePtr(), ru, myV, first, last);
+            assign_v(bc_sign, myV, psi, first, last_real);
+            ratios_private[iat][tid] = simd::dot(
+                psi.data() + first, psiinv.data() + first, last_real - first);
+        }
     }
-    int first, last;
-    FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(), tid, first, last);
-    const int last_real = this->kPoints.size() < last ? this->kPoints.size() : last;
-
-    for (int iat = 0; iat < VP.getTotalNum(); ++iat)
-    {
-      const PointType& r = VP.activeR(iat);
-      PointType ru;
-      int bc_sign = convertPos(r, ru);
 
-      spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
-      assign_v(bc_sign, myV, psi, first, last_real);
-      ratios_private[iat][tid] = simd::dot(psi.data() + first, psiinv.data() + first, last_real - first);
+    // do the reduction manually
+    for (int iat = 0; iat < VP.getTotalNum(); ++iat) {
+        ratios[iat] = TT(0);
+        for (int tid = 0; tid < ratios_private.cols(); tid++)
+            ratios[iat] += ratios_private[iat][tid];
     }
-  }
-
-  // do the reduction manually
-  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
-  {
-    ratios[iat] = TT(0);
-    for (int tid = 0; tid < ratios_private.cols(); tid++)
-      ratios[iat] += ratios_private[iat][tid];
-  }
 }
 
-template<typename ST>
-inline void SplineR2RT<ST>::assign_vgl(int bc_sign,
-                                      ValueVector& psi,
-                                      GradVector& dpsi,
-                                      ValueVector& d2psi,
-                                      int first,
-                                      int last) const
+template <typename ST>
+inline void
+SplineR2RT<ST>::assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi,
+    ValueVector& d2psi, int first, int last) const
 {
-  // protect last
-  last = last > this->kPoints.size() ? this->kPoints.size() : last;
-
-  const ST signed_one = (bc_sign & 1) ? -1 : 1;
-  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-           g22      = PrimLattice.G(8);
-  const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
-
-  const ST* restrict g0  = myG.data(0);
-  const ST* restrict g1  = myG.data(1);
-  const ST* restrict g2  = myG.data(2);
-  const ST* restrict h00 = myH.data(0);
-  const ST* restrict h01 = myH.data(1);
-  const ST* restrict h02 = myH.data(2);
-  const ST* restrict h11 = myH.data(3);
-  const ST* restrict h12 = myH.data(4);
-  const ST* restrict h22 = myH.data(5);
+    // protect last
+    last = last > this->kPoints.size() ? this->kPoints.size() : last;
+
+    const ST signed_one = (bc_sign & 1) ? -1 : 1;
+    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
+             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
+             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+             g22 = PrimLattice.G(8);
+    const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4],
+        GGt[5] + GGt[7], GGt[8]};
+
+    const ST* restrict g0 = myG.data(0);
+    const ST* restrict g1 = myG.data(1);
+    const ST* restrict g2 = myG.data(2);
+    const ST* restrict h00 = myH.data(0);
+    const ST* restrict h01 = myH.data(1);
+    const ST* restrict h02 = myH.data(2);
+    const ST* restrict h11 = myH.data(3);
+    const ST* restrict h12 = myH.data(4);
+    const ST* restrict h22 = myH.data(5);
 
 #pragma omp simd
-  for (size_t j = first; j < last; ++j)
-  {
-    const size_t psiIndex = this->first_spo + j;
-    psi[psiIndex]         = signed_one * myV[j];
-    dpsi[psiIndex][0]     = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]);
-    dpsi[psiIndex][1]     = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]);
-    dpsi[psiIndex][2]     = signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]);
-    d2psi[psiIndex]       = signed_one * SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG);
-  }
+    for (size_t j = first; j < last; ++j) {
+        const size_t psiIndex = this->first_spo + j;
+        psi[psiIndex] = signed_one * myV[j];
+        dpsi[psiIndex][0] =
+            signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]);
+        dpsi[psiIndex][1] =
+            signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]);
+        dpsi[psiIndex][2] =
+            signed_one * (g20 * g0[j] + g21 * g1[j] + g22 * g2[j]);
+        d2psi[psiIndex] = signed_one *
+            SymTrace(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], symGG);
+    }
 }
 
-/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
-   */
-template<typename ST>
-inline void SplineR2RT<ST>::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+ * cartesian
+ */
+template <typename ST>
+inline void
+SplineR2RT<ST>::assign_vgl_from_l(
+    int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  const ST signed_one   = (bc_sign & 1) ? -1 : 1;
-  const ST* restrict g0 = myG.data(0);
-  const ST* restrict g1 = myG.data(1);
-  const ST* restrict g2 = myG.data(2);
+    const ST signed_one = (bc_sign & 1) ? -1 : 1;
+    const ST* restrict g0 = myG.data(0);
+    const ST* restrict g1 = myG.data(1);
+    const ST* restrict g2 = myG.data(2);
 
 #pragma omp simd
-  for (int psiIndex = this->first_spo; psiIndex < this->last_spo; ++psiIndex)
-  {
-    const size_t j    = psiIndex - this->first_spo;
-    psi[psiIndex]     = signed_one * myV[j];
-    dpsi[psiIndex][0] = signed_one * g0[j];
-    dpsi[psiIndex][1] = signed_one * g1[j];
-    dpsi[psiIndex][2] = signed_one * g2[j];
-    d2psi[psiIndex]   = signed_one * myL[j];
-  }
+    for (int psiIndex = this->first_spo; psiIndex < this->last_spo;
+         ++psiIndex) {
+        const size_t j = psiIndex - this->first_spo;
+        psi[psiIndex] = signed_one * myV[j];
+        dpsi[psiIndex][0] = signed_one * g0[j];
+        dpsi[psiIndex][1] = signed_one * g1[j];
+        dpsi[psiIndex][2] = signed_one * g2[j];
+        d2psi[psiIndex] = signed_one * myL[j];
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::evaluateVGL(const ParticleSet& P,
-                                const int iat,
-                                ValueVector& psi,
-                                GradVector& dpsi,
-                                ValueVector& d2psi)
+template <typename ST>
+void
+SplineR2RT<ST>::evaluateVGL(const ParticleSetT<ST>& P, const int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  const PointType& r = P.activeR(iat);
-  PointType ru;
-  int bc_sign = convertPos(r, ru);
+    const PointType& r = P.activeR(iat);
+    PointType ru;
+    int bc_sign = convertPos(r, ru);
 
 #pragma omp parallel
-  {
-    int first, last;
-    FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+    {
+        int first, last;
+        FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(),
+            omp_get_thread_num(), first, last);
 
-    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
-    assign_vgl(bc_sign, psi, dpsi, d2psi, first, last);
-  }
+        spline2::evaluate3d_vgh(
+            SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+        assign_vgl(bc_sign, psi, dpsi, d2psi, first, last);
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::assign_vgh(int bc_sign,
-                               ValueVector& psi,
-                               GradVector& dpsi,
-                               HessVector& grad_grad_psi,
-                               int first,
-                               int last) const
+template <typename ST>
+void
+SplineR2RT<ST>::assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
+    HessVector& grad_grad_psi, int first, int last) const
 {
-  // protect last
-  last = last > this->kPoints.size() ? this->kPoints.size() : last;
-
-  const ST signed_one = (bc_sign & 1) ? -1 : 1;
-  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-           g22 = PrimLattice.G(8);
-
-  const ST* restrict g0  = myG.data(0);
-  const ST* restrict g1  = myG.data(1);
-  const ST* restrict g2  = myG.data(2);
-  const ST* restrict h00 = myH.data(0);
-  const ST* restrict h01 = myH.data(1);
-  const ST* restrict h02 = myH.data(2);
-  const ST* restrict h11 = myH.data(3);
-  const ST* restrict h12 = myH.data(4);
-  const ST* restrict h22 = myH.data(5);
+    // protect last
+    last = last > this->kPoints.size() ? this->kPoints.size() : last;
+
+    const ST signed_one = (bc_sign & 1) ? -1 : 1;
+    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
+             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
+             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+             g22 = PrimLattice.G(8);
+
+    const ST* restrict g0 = myG.data(0);
+    const ST* restrict g1 = myG.data(1);
+    const ST* restrict g2 = myG.data(2);
+    const ST* restrict h00 = myH.data(0);
+    const ST* restrict h01 = myH.data(1);
+    const ST* restrict h02 = myH.data(2);
+    const ST* restrict h11 = myH.data(3);
+    const ST* restrict h12 = myH.data(4);
+    const ST* restrict h22 = myH.data(5);
 
 #pragma omp simd
-  for (size_t j = first; j < last; ++j)
-  {
-    //dot(PrimLattice.G,myG[j])
-    const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j];
-    const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j];
-    const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j];
-
-    const size_t psiIndex = j + this->first_spo;
-    psi[psiIndex]         = signed_one * myV[j];
-    dpsi[psiIndex][0]     = signed_one * dX_r;
-    dpsi[psiIndex][1]     = signed_one * dY_r;
-    dpsi[psiIndex][2]     = signed_one * dZ_r;
-
-    const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02);
-    const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12);
-    const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22);
-    const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g00, g01, g02);
-    const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12);
-    const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22);
-    const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g00, g01, g02);
-    const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g10, g11, g12);
-    const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22);
-
-    grad_grad_psi[psiIndex][0] = signed_one * h_xx_r;
-    grad_grad_psi[psiIndex][1] = signed_one * h_xy_r;
-    grad_grad_psi[psiIndex][2] = signed_one * h_xz_r;
-    grad_grad_psi[psiIndex][3] = signed_one * h_yx_r;
-    grad_grad_psi[psiIndex][4] = signed_one * h_yy_r;
-    grad_grad_psi[psiIndex][5] = signed_one * h_yz_r;
-    grad_grad_psi[psiIndex][6] = signed_one * h_zx_r;
-    grad_grad_psi[psiIndex][7] = signed_one * h_zy_r;
-    grad_grad_psi[psiIndex][8] = signed_one * h_zz_r;
-  }
+    for (size_t j = first; j < last; ++j) {
+        // dot(PrimLattice.G,myG[j])
+        const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j];
+        const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j];
+        const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j];
+
+        const size_t psiIndex = j + this->first_spo;
+        psi[psiIndex] = signed_one * myV[j];
+        dpsi[psiIndex][0] = signed_one * dX_r;
+        dpsi[psiIndex][1] = signed_one * dY_r;
+        dpsi[psiIndex][2] = signed_one * dZ_r;
+
+        const ST h_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g00, g01, g02);
+        const ST h_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g10, g11, g12);
+        const ST h_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g20, g21, g22);
+        const ST h_yx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g10, g11, g12, g00, g01, g02);
+        const ST h_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g10, g11, g12, g10, g11, g12);
+        const ST h_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g10, g11, g12, g20, g21, g22);
+        const ST h_zx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g20, g21, g22, g00, g01, g02);
+        const ST h_zy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g20, g21, g22, g10, g11, g12);
+        const ST h_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g20, g21, g22, g20, g21, g22);
+
+        grad_grad_psi[psiIndex][0] = signed_one * h_xx_r;
+        grad_grad_psi[psiIndex][1] = signed_one * h_xy_r;
+        grad_grad_psi[psiIndex][2] = signed_one * h_xz_r;
+        grad_grad_psi[psiIndex][3] = signed_one * h_yx_r;
+        grad_grad_psi[psiIndex][4] = signed_one * h_yy_r;
+        grad_grad_psi[psiIndex][5] = signed_one * h_yz_r;
+        grad_grad_psi[psiIndex][6] = signed_one * h_zx_r;
+        grad_grad_psi[psiIndex][7] = signed_one * h_zy_r;
+        grad_grad_psi[psiIndex][8] = signed_one * h_zz_r;
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::evaluateVGH(const ParticleSet& P,
-                                const int iat,
-                                ValueVector& psi,
-                                GradVector& dpsi,
-                                HessVector& grad_grad_psi)
+template <typename ST>
+void
+SplineR2RT<ST>::evaluateVGH(const ParticleSetT<ST>& P, const int iat,
+    ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi)
 {
-  const PointType& r = P.activeR(iat);
-  PointType ru;
-  int bc_sign = convertPos(r, ru);
+    const PointType& r = P.activeR(iat);
+    PointType ru;
+    int bc_sign = convertPos(r, ru);
 
 #pragma omp parallel
-  {
-    int first, last;
-    FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+    {
+        int first, last;
+        FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(),
+            omp_get_thread_num(), first, last);
 
-    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
-    assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last);
-  }
+        spline2::evaluate3d_vgh(
+            SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+        assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, first, last);
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::assign_vghgh(int bc_sign,
-                                 ValueVector& psi,
-                                 GradVector& dpsi,
-                                 HessVector& grad_grad_psi,
-                                 GGGVector& grad_grad_grad_psi,
-                                 int first,
-                                 int last) const
+template <typename ST>
+void
+SplineR2RT<ST>::assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
+    HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first,
+    int last) const
 {
-  // protect last
-  last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last);
-
-  const ST signed_one = (bc_sign & 1) ? -1 : 1;
-  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-           g22 = PrimLattice.G(8);
-
-  const ST* restrict g0  = myG.data(0);
-  const ST* restrict g1  = myG.data(1);
-  const ST* restrict g2  = myG.data(2);
-  const ST* restrict h00 = myH.data(0);
-  const ST* restrict h01 = myH.data(1);
-  const ST* restrict h02 = myH.data(2);
-  const ST* restrict h11 = myH.data(3);
-  const ST* restrict h12 = myH.data(4);
-  const ST* restrict h22 = myH.data(5);
-
-  const ST* restrict gh000 = mygH.data(0);
-  const ST* restrict gh001 = mygH.data(1);
-  const ST* restrict gh002 = mygH.data(2);
-  const ST* restrict gh011 = mygH.data(3);
-  const ST* restrict gh012 = mygH.data(4);
-  const ST* restrict gh022 = mygH.data(5);
-  const ST* restrict gh111 = mygH.data(6);
-  const ST* restrict gh112 = mygH.data(7);
-  const ST* restrict gh122 = mygH.data(8);
-  const ST* restrict gh222 = mygH.data(9);
-
-  //SIMD doesn't work quite right yet.  Comment out until further debugging.
-  //#pragma omp simd
-  for (size_t j = first; j < last; ++j)
-  {
-    const ST val_r = myV[j];
-
-
-    //dot(PrimLattice.G,myG[j])
-    const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j];
-    const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j];
-    const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j];
-
-    const size_t psiIndex = j + this->first_spo;
-    psi[psiIndex]         = signed_one * val_r;
-    dpsi[psiIndex][0]     = signed_one * dX_r;
-    dpsi[psiIndex][1]     = signed_one * dY_r;
-    dpsi[psiIndex][2]     = signed_one * dZ_r;
-
-    //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
-    const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02);
-    const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12);
-    const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22);
-    const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g10, g11, g12);
-    const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g10, g11, g12, g20, g21, g22);
-    const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22);
-
-    /*    const ST h_xx_r=f_xx_r;
-      const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r;
-      const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r;
-      const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r;
-      const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r;
-      const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */
-
-    grad_grad_psi[psiIndex][0] = f_xx_r * signed_one;
-    grad_grad_psi[psiIndex][1] = f_xy_r * signed_one;
-    grad_grad_psi[psiIndex][2] = f_xz_r * signed_one;
-    grad_grad_psi[psiIndex][4] = f_yy_r * signed_one;
-    grad_grad_psi[psiIndex][5] = f_yz_r * signed_one;
-    grad_grad_psi[psiIndex][8] = f_zz_r * signed_one;
-
-    //symmetry:
-    grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1];
-    grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2];
-    grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5];
-    //These are the real and imaginary components of the third SPO derivative.  _xxx denotes
-    // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
-
-    const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-    const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-    const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-    const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-    const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-    const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-    const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-    const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-    const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-    const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j],
-                                    gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-    //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
-    /*     const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i;
-      const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; 
-      const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; 
-      const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i;
-      const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i;
-      const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i;
-      const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i;
-      const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; 
-      const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i;
-      const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/
-    //[x][xx] //These are the unique entries
-    grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r;
-    grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r;
-    grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r;
-    grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r;
-    grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r;
-    grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r;
-
-    //filling in the symmetric terms.  Filling out the xij terms
-    grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1];
-    grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2];
-    grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5];
-
-    //Now for everything that's a permutation of the above:
-    grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1];
-    grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4];
-    grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5];
-    grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4];
-    grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5];
-
-    grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2];
-    grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5];
-    grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8];
-    grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5];
-    grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8];
-
-    grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r;
-    grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r;
-    grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r;
-
-    grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5];
-    grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5];
-    grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8];
-    grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8];
-
-    grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r;
-  }
+    // protect last
+    last = last < 0 ?
+        this->kPoints.size() :
+        (last > this->kPoints.size() ? this->kPoints.size() : last);
+
+    const ST signed_one = (bc_sign & 1) ? -1 : 1;
+    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
+             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
+             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+             g22 = PrimLattice.G(8);
+
+    const ST* restrict g0 = myG.data(0);
+    const ST* restrict g1 = myG.data(1);
+    const ST* restrict g2 = myG.data(2);
+    const ST* restrict h00 = myH.data(0);
+    const ST* restrict h01 = myH.data(1);
+    const ST* restrict h02 = myH.data(2);
+    const ST* restrict h11 = myH.data(3);
+    const ST* restrict h12 = myH.data(4);
+    const ST* restrict h22 = myH.data(5);
+
+    const ST* restrict gh000 = mygH.data(0);
+    const ST* restrict gh001 = mygH.data(1);
+    const ST* restrict gh002 = mygH.data(2);
+    const ST* restrict gh011 = mygH.data(3);
+    const ST* restrict gh012 = mygH.data(4);
+    const ST* restrict gh022 = mygH.data(5);
+    const ST* restrict gh111 = mygH.data(6);
+    const ST* restrict gh112 = mygH.data(7);
+    const ST* restrict gh122 = mygH.data(8);
+    const ST* restrict gh222 = mygH.data(9);
+
+    // SIMD doesn't work quite right yet.  Comment out until further debugging.
+    // #pragma omp simd
+    for (size_t j = first; j < last; ++j) {
+        const ST val_r = myV[j];
+
+        // dot(PrimLattice.G,myG[j])
+        const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j];
+        const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j];
+        const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j];
+
+        const size_t psiIndex = j + this->first_spo;
+        psi[psiIndex] = signed_one * val_r;
+        dpsi[psiIndex][0] = signed_one * dX_r;
+        dpsi[psiIndex][1] = signed_one * dY_r;
+        dpsi[psiIndex][2] = signed_one * dZ_r;
+
+        // intermediates for computation of hessian. \partial_i \partial_j phi
+        // in cartesian coordinates.
+        const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g00, g01, g02);
+        const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g10, g11, g12);
+        const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g00, g01, g02, g20, g21, g22);
+        const ST f_yy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g10, g11, g12, g10, g11, g12);
+        const ST f_yz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g10, g11, g12, g20, g21, g22);
+        const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j],
+            g20, g21, g22, g20, g21, g22);
+
+        /*    const ST h_xx_r=f_xx_r;
+          const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r;
+          const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r;
+          const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r;
+          const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r;
+          const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */
+
+        grad_grad_psi[psiIndex][0] = f_xx_r * signed_one;
+        grad_grad_psi[psiIndex][1] = f_xy_r * signed_one;
+        grad_grad_psi[psiIndex][2] = f_xz_r * signed_one;
+        grad_grad_psi[psiIndex][4] = f_yy_r * signed_one;
+        grad_grad_psi[psiIndex][5] = f_yz_r * signed_one;
+        grad_grad_psi[psiIndex][8] = f_zz_r * signed_one;
+
+        // symmetry:
+        grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1];
+        grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2];
+        grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5];
+        // These are the real and imaginary components of the third SPO
+        // derivative.  _xxx denotes
+        //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
+        //  and z, and so on.
+
+        const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g00, g01, g02, g00, g01, g02);
+        const ST f3_xxy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g00, g01, g02, g10, g11, g12);
+        const ST f3_xxz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g00, g01, g02, g20, g21, g22);
+        const ST f3_xyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g10, g11, g12, g10, g11, g12);
+        const ST f3_xyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g10, g11, g12, g20, g21, g22);
+        const ST f3_xzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00,
+            g01, g02, g20, g21, g22, g20, g21, g22);
+        const ST f3_yyy_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10,
+            g11, g12, g10, g11, g12, g10, g11, g12);
+        const ST f3_yyz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10,
+            g11, g12, g10, g11, g12, g20, g21, g22);
+        const ST f3_yzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g10,
+            g11, g12, g20, g21, g22, g20, g21, g22);
+        const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j],
+            gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g20,
+            g21, g22, g20, g21, g22, g20, g21, g22);
+
+        // Here is where we build up the components of the physical hessian
+        // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+        /*     const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r -
+          kX*kX*kX*val_i; const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) -
+          (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; const ST gh_xxz_r= f3_xxz_r
+          +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i;
+          const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) -
+          (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; const ST gh_xyz_r= f3_xyz_r
+          +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) -
+          kX*kY*kZ*val_i; const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) -
+          (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; const ST gh_yyy_r= f3_yyy_r
+          + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; const ST gh_yyz_r=
+          f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) -
+          (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; const ST gh_yzz_r= f3_yzz_r
+          +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i;
+          const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r -
+          kZ*kZ*kZ*val_i;*/
+        //[x][xx] //These are the unique entries
+        grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r;
+        grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r;
+        grad_grad_grad_psi[psiIndex][0][2] = signed_one * f3_xxz_r;
+        grad_grad_grad_psi[psiIndex][0][4] = signed_one * f3_xyy_r;
+        grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r;
+        grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r;
+
+        // filling in the symmetric terms.  Filling out the xij terms
+        grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1];
+        grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2];
+        grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5];
+
+        // Now for everything that's a permutation of the above:
+        grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1];
+        grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4];
+        grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5];
+        grad_grad_grad_psi[psiIndex][1][3] = grad_grad_grad_psi[psiIndex][0][4];
+        grad_grad_grad_psi[psiIndex][1][6] = grad_grad_grad_psi[psiIndex][0][5];
+
+        grad_grad_grad_psi[psiIndex][2][0] = grad_grad_grad_psi[psiIndex][0][2];
+        grad_grad_grad_psi[psiIndex][2][1] = grad_grad_grad_psi[psiIndex][0][5];
+        grad_grad_grad_psi[psiIndex][2][2] = grad_grad_grad_psi[psiIndex][0][8];
+        grad_grad_grad_psi[psiIndex][2][3] = grad_grad_grad_psi[psiIndex][0][5];
+        grad_grad_grad_psi[psiIndex][2][6] = grad_grad_grad_psi[psiIndex][0][8];
+
+        grad_grad_grad_psi[psiIndex][1][4] = signed_one * f3_yyy_r;
+        grad_grad_grad_psi[psiIndex][1][5] = signed_one * f3_yyz_r;
+        grad_grad_grad_psi[psiIndex][1][8] = signed_one * f3_yzz_r;
+
+        grad_grad_grad_psi[psiIndex][1][7] = grad_grad_grad_psi[psiIndex][1][5];
+        grad_grad_grad_psi[psiIndex][2][4] = grad_grad_grad_psi[psiIndex][1][5];
+        grad_grad_grad_psi[psiIndex][2][5] = grad_grad_grad_psi[psiIndex][1][8];
+        grad_grad_grad_psi[psiIndex][2][7] = grad_grad_grad_psi[psiIndex][1][8];
+
+        grad_grad_grad_psi[psiIndex][2][8] = signed_one * f3_zzz_r;
+    }
 }
 
-template<typename ST>
-void SplineR2RT<ST>::evaluateVGHGH(const ParticleSet& P,
-                                  const int iat,
-                                  ValueVector& psi,
-                                  GradVector& dpsi,
-                                  HessVector& grad_grad_psi,
-                                  GGGVector& grad_grad_grad_psi)
+template <typename ST>
+void
+SplineR2RT<ST>::evaluateVGHGH(const ParticleSetT<ST>& P, const int iat,
+    ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi,
+    GGGVector& grad_grad_grad_psi)
 {
-  const PointType& r = P.activeR(iat);
-  PointType ru;
-  int bc_sign = convertPos(r, ru);
+    const PointType& r = P.activeR(iat);
+    PointType ru;
+    int bc_sign = convertPos(r, ru);
 
 #pragma omp parallel
-  {
-    int first, last;
-    FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
-
-    spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
-    assign_vghgh(bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last);
-  }
+    {
+        int first, last;
+        FairDivideAligned(psi.size(), getAlignment<ST>(), omp_get_num_threads(),
+            omp_get_thread_num(), first, last);
+
+        spline2::evaluate3d_vghgh(
+            SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
+        assign_vghgh(
+            bc_sign, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first, last);
+    }
 }
 
 template class SplineR2RT<float>;
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
index 302d745f05..f265561e18 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
@@ -1,222 +1,269 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo,
+//                    yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SPLINE_R2R_H
 #define QMCPLUSPLUS_SPLINE_R2R_H
 
-#include <memory>
-#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h"
 #include "OhmmsSoA/VectorSoaContainer.h"
-#include "spline2/MultiBspline.hpp"
+#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h"
 #include "Utilities/FairDivide.h"
+#include "spline2/MultiBspline.hpp"
+
+#include <memory>
 
 namespace qmcplusplus
 {
 /** class to match ST real spline with BsplineSet::ValueType (real) SPOs
  * @tparam ST precision of spline
  *
- * Requires temporage storage and multiplication of the sign of the real part of the phase
- * Internal storage ST type arrays are aligned and padded.
+ * Requires temporage storage and multiplication of the sign of the real part of
+ * the phase Internal storage ST type arrays are aligned and padded.
  */
-template<typename ST>
+template <typename ST>
 class SplineR2RT : public BsplineSetT<ST>
 {
 public:
-  using SplineType       = typename bspline_traits<ST, 3>::SplineType;
-  using BCType           = typename bspline_traits<ST, 3>::BCType;
-  using DataType         = ST;
-  using PointType        = TinyVector<ST, 3>;
-  using SingleSplineType = UBspline_3d_d;
-  // types for evaluation results
-  using TT = typename BsplineSetT<ST>::ValueType;
-  using GGGVector = typename BsplineSetT<ST>::GGGVector;
-  using ValueMatrix = typename BsplineSetT<ST>::ValueMatrix;
-  using GradVector = typename BsplineSetT<ST>::GradVector;
-  using HessVector = typename BsplineSetT<ST>::HessVector;
-  using ValueVector = typename BsplineSetT<ST>::ValueVector;
-
-  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
-  using gContainer_type  = VectorSoaContainer<ST, 3>;
-  using hContainer_type  = VectorSoaContainer<ST, 6>;
-  using ghContainer_type = VectorSoaContainer<ST, 10>;
-
-  using RealType  = typename SPOSetT<ST>::RealType;
+    using SplineType = typename bspline_traits<ST, 3>::SplineType;
+    using BCType = typename bspline_traits<ST, 3>::BCType;
+    using DataType = ST;
+    using PointType = TinyVector<ST, 3>;
+    using SingleSplineType = UBspline_3d_d;
+    // types for evaluation results
+    using TT = typename BsplineSetT<ST>::ValueType;
+    using GGGVector = typename BsplineSetT<ST>::GGGVector;
+    using ValueMatrix = typename BsplineSetT<ST>::ValueMatrix;
+    using GradVector = typename BsplineSetT<ST>::GradVector;
+    using HessVector = typename BsplineSetT<ST>::HessVector;
+    using ValueVector = typename BsplineSetT<ST>::ValueVector;
+
+    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
+    using gContainer_type = VectorSoaContainer<ST, 3>;
+    using hContainer_type = VectorSoaContainer<ST, 6>;
+    using ghContainer_type = VectorSoaContainer<ST, 10>;
+
+    using RealType = typename SPOSetT<ST>::RealType;
 
 private:
-  bool IsGamma;
-  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
-  Tensor<ST, 3> GGt;
-  ///multi bspline set
-  std::shared_ptr<MultiBspline<ST>> SplineInst;
-
-  ///Copy of original splines for orbital rotation
-  std::shared_ptr<std::vector<RealType>> coef_copy_;
+    bool IsGamma;
+    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+    /// CartesianUnit, e.g. Hessian
+    Tensor<ST, 3> GGt;
+    /// multi bspline set
+    std::shared_ptr<MultiBspline<ST>> SplineInst;
 
-  ///thread private ratios for reduction when using nested threading, numVP x numThread
-  Matrix<TT> ratios_private;
+    /// Copy of original splines for orbital rotation
+    std::shared_ptr<std::vector<RealType>> coef_copy_;
 
+    /// thread private ratios for reduction when using nested threading, numVP x
+    /// numThread
+    Matrix<TT> ratios_private;
 
 protected:
-  ///primitive cell
-  CrystalLattice<ST, 3> PrimLattice;
-  /// intermediate result vectors
-  vContainer_type myV;
-  vContainer_type myL;
-  gContainer_type myG;
-  hContainer_type myH;
-  ghContainer_type mygH;
+    /// primitive cell
+    CrystalLattice<ST, 3> PrimLattice;
+    /// intermediate result vectors
+    vContainer_type myV;
+    vContainer_type myL;
+    gContainer_type myG;
+    hContainer_type myH;
+    ghContainer_type mygH;
 
 public:
-  SplineR2RT(const std::string& my_name) : BsplineSetT<ST>(my_name) {}
-
-  SplineR2RT(const SplineR2RT& in);
-  virtual std::string getClassName() const override { return "SplineR2RT"; }
-  virtual std::string getKeyword() const override { return "SplineR2RT"; }
-  bool isComplex() const override { return false; };
-  bool isRotationSupported() const override { return true; }
-
-  std::unique_ptr<SPOSetT<ST>> makeClone() const override { return std::make_unique<SplineR2RT<ST>>(*this); }
-
-  /// Store an original copy of the spline coefficients for orbital rotation
-  void storeParamsBeforeRotation() override;
-
-  /*
-     Implements orbital rotations via [1,2].
-     Should be called by RotatedSPOs::apply_rotation()
-
-     This implementation requires that NSPOs > Nelec. In other words,
-     if you want to run a orbopt wfn, you must include some virtual orbitals!
-
-     Some results (using older Berkeley branch) were published in [3].
-
-     [1] Filippi & Fahy, JCP 112, (2000)
-     [2] Toulouse & Umrigar, JCP 126, (2007)
-     [3] Townsend et al., PRB 102, (2020)
-  */
-  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
-
-  inline void resizeStorage(size_t n, size_t nvals)
-  {
-    this->init_base(n);
-    const size_t npad = getAlignedSize<ST>(n);
-    this->myV.resize(npad);
-    this->myG.resize(npad);
-    this->myL.resize(npad);
-    this->myH.resize(npad);
-    this->mygH.resize(npad);
-
-    IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0));
-  }
-
-  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
-
-  void gather_tables(Communicate* comm)
-  {
-    if (comm->size() == 1)
-      return;
-    const int Nbands      = this->kPoints.size();
-    const int Nbandgroups = comm->size();
-    this->offset.resize(Nbandgroups + 1, 0);
-    FairDivideLow(Nbands, Nbandgroups, this->offset);
-    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
-  }
-
-  template<typename GT, typename BCT>
-  void create_spline(GT& xyz_g, BCT& xyz_bc)
-  {
-    GGt        = dot(transpose(PrimLattice.G), PrimLattice.G);
-    SplineInst = std::make_shared<MultiBspline<ST>>();
-    SplineInst->create(xyz_g, xyz_bc, myV.size());
-
-    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
-              << "for the coefficients in 3D spline orbital representation" << std::endl;
-  }
-
-  inline void flush_zero() { SplineInst->flush_zero(); }
-
-  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
-
-  bool read_splines(hdf_archive& h5f);
-
-  bool write_splines(hdf_archive& h5f);
-
-  /** convert position in PrimLattice unit and return sign */
-  inline int convertPos(const PointType& r, PointType& ru)
-  {
-    ru          = PrimLattice.toUnit(r);
-    int bc_sign = 0;
-    for (int i = 0; i < this->D; i++)
-      if (-std::numeric_limits<ST>::epsilon() < ru[i] && ru[i] < 0)
-        ru[i] = ST(0.0);
-      else
-      {
-        ST img = std::floor(ru[i]);
-        ru[i] -= img;
-        bc_sign += this->HalfG[i] * (int)img;
-      }
-    return bc_sign;
-  }
-
-  void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
-
-  void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override;
-
-  void evaluateDetRatios(const VirtualParticleSet& VP,
-                         ValueVector& psi,
-                         const ValueVector& psiinv,
-                         std::vector<TT>& ratios) override;
-
-  void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const;
-
-  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
-   */
-  void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-
-  void evaluateVGL(const ParticleSet& P,
-                   const int iat,
-                   ValueVector& psi,
-                   GradVector& dpsi,
-                   ValueVector& d2psi) override;
-
-  void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last)
-      const;
-
-  void evaluateVGH(const ParticleSet& P,
-                   const int iat,
-                   ValueVector& psi,
-                   GradVector& dpsi,
-                   HessVector& grad_grad_psi) override;
-
-  void assign_vghgh(int bc_sign,
-                    ValueVector& psi,
-                    GradVector& dpsi,
-                    HessVector& grad_grad_psi,
-                    GGGVector& grad_grad_grad_psi,
-                    int first = 0,
-                    int last  = -1) const;
-
-  void evaluateVGHGH(const ParticleSet& P,
-                     const int iat,
-                     ValueVector& psi,
-                     GradVector& dpsi,
-                     HessVector& grad_grad_psi,
-                     GGGVector& grad_grad_grad_psi) override;
-
-  template<class BSPLINESPO>
-  friend struct SplineSetReader;
-  friend struct BsplineReaderBase;
+    SplineR2RT(const std::string& my_name) : BsplineSetT<ST>(my_name)
+    {
+    }
+
+    SplineR2RT(const SplineR2RT& in);
+    virtual std::string
+    getClassName() const override
+    {
+        return "SplineR2RT";
+    }
+    virtual std::string
+    getKeyword() const override
+    {
+        return "SplineR2RT";
+    }
+    bool
+    isComplex() const override
+    {
+        return false;
+    };
+    bool
+    isRotationSupported() const override
+    {
+        return true;
+    }
+
+    std::unique_ptr<SPOSetT<ST>>
+    makeClone() const override
+    {
+        return std::make_unique<SplineR2RT<ST>>(*this);
+    }
+
+    /// Store an original copy of the spline coefficients for orbital rotation
+    void
+    storeParamsBeforeRotation() override;
+
+    /*
+       Implements orbital rotations via [1,2].
+       Should be called by RotatedSPOs::apply_rotation()
+
+       This implementation requires that NSPOs > Nelec. In other words,
+       if you want to run a orbopt wfn, you must include some virtual orbitals!
+
+       Some results (using older Berkeley branch) were published in [3].
+
+       [1] Filippi & Fahy, JCP 112, (2000)
+       [2] Toulouse & Umrigar, JCP 126, (2007)
+       [3] Townsend et al., PRB 102, (2020)
+    */
+    void
+    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
+
+    inline void
+    resizeStorage(size_t n, size_t nvals)
+    {
+        this->init_base(n);
+        const size_t npad = getAlignedSize<ST>(n);
+        this->myV.resize(npad);
+        this->myG.resize(npad);
+        this->myL.resize(npad);
+        this->myH.resize(npad);
+        this->mygH.resize(npad);
+
+        IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) &&
+            (this->HalfG[2] == 0));
+    }
+
+    void
+    bcast_tables(Communicate* comm)
+    {
+        chunked_bcast(comm, SplineInst->getSplinePtr());
+    }
+
+    void
+    gather_tables(Communicate* comm)
+    {
+        if (comm->size() == 1)
+            return;
+        const int Nbands = this->kPoints.size();
+        const int Nbandgroups = comm->size();
+        this->offset.resize(Nbandgroups + 1, 0);
+        FairDivideLow(Nbands, Nbandgroups, this->offset);
+        gatherv(comm, SplineInst->getSplinePtr(),
+            SplineInst->getSplinePtr()->z_stride, this->offset);
+    }
+
+    template <typename GT, typename BCT>
+    void
+    create_spline(GT& xyz_g, BCT& xyz_bc)
+    {
+        GGt = dot(transpose(PrimLattice.G), PrimLattice.G);
+        SplineInst = std::make_shared<MultiBspline<ST>>();
+        SplineInst->create(xyz_g, xyz_bc, myV.size());
+
+        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
+                  << " MB allocated "
+                  << "for the coefficients in 3D spline orbital representation"
+                  << std::endl;
+    }
+
+    inline void
+    flush_zero()
+    {
+        SplineInst->flush_zero();
+    }
+
+    void
+    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
+        int twist, int ispline, int level);
+
+    bool
+    read_splines(hdf_archive& h5f);
+
+    bool
+    write_splines(hdf_archive& h5f);
+
+    /** convert position in PrimLattice unit and return sign */
+    inline int
+    convertPos(const PointType& r, PointType& ru)
+    {
+        ru = PrimLattice.toUnit(r);
+        int bc_sign = 0;
+        for (int i = 0; i < this->D; i++)
+            if (-std::numeric_limits<ST>::epsilon() < ru[i] && ru[i] < 0)
+                ru[i] = ST(0.0);
+            else {
+                ST img = std::floor(ru[i]);
+                ru[i] -= img;
+                bc_sign += this->HalfG[i] * (int)img;
+            }
+        return bc_sign;
+    }
+
+    void
+    assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi,
+        int first, int last) const;
+
+    void
+    evaluateValue(
+        const ParticleSetT<ST>& P, const int iat, ValueVector& psi) override;
+
+    void
+    evaluateDetRatios(const VirtualParticleSetT<ST>& VP, ValueVector& psi,
+        const ValueVector& psiinv, std::vector<TT>& ratios) override;
+
+    void
+    assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi,
+        ValueVector& d2psi, int first, int last) const;
+
+    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+     * cartesian
+     */
+    void
+    assign_vgl_from_l(
+        int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+    void
+    evaluateVGL(const ParticleSetT<ST>& P, const int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override;
+
+    void
+    assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
+        HessVector& grad_grad_psi, int first, int last) const;
+
+    void
+    evaluateVGH(const ParticleSetT<ST>& P, const int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi) override;
+
+    void
+    assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
+        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
+        int last = -1) const;
+
+    void
+    evaluateVGHGH(const ParticleSetT<ST>& P, const int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi,
+        GGGVector& grad_grad_grad_psi) override;
+
+    template <class BSPLINESPO>
+    friend struct SplineSetReader;
+    friend struct BsplineReaderBase;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 61fa6756fc..8e7c080bd6 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -24,6 +24,7 @@ add_subdirectory(detail)
 set(WFBASE_SRCS
     OptimizableFunctorBase.cpp
     VariableSet.cpp
+    VariableSetT.cpp
     WaveFunctionPool.cpp
     WaveFunctionComponent.cpp
     WaveFunctionComponentBuilder.cpp
@@ -41,14 +42,16 @@ set(WFBASE_SRCS
     HarmonicOscillator/SHOSetBuilder.cpp
     HarmonicOscillator/SHOSetBuilderT.cpp
     ExampleHeBuilder.cpp
-    ExampleHeComponent.cpp)
+    ExampleHeComponent.cpp
+    RotatedSPOsT.cpp
+    SpinorSetT.cpp)
 
 if(NOT QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp)
 endif(NOT QMC_COMPLEX)
 
 if(QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp SpinorSetT.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp)
 endif(QMC_COMPLEX)
 ########################
 # build jastrows
@@ -85,18 +88,21 @@ if(OHMMS_DIM MATCHES 3)
       LCAO/LCAOrbitalBuilderT.cpp
       LCAO/MultiQuinticSpline1D.cpp
       LCAO/AOBasisBuilder.cpp
-      LCAO/SoaLocalizedBasisSet.cpp)
+      LCAO/AOBasisBuilderT.cpp
+      LCAO/SoaLocalizedBasisSet.cpp
+      LCAO/SoaLocalizedBasisSetT.cpp
+      LCAO/LCAOSpinorBuilderT.cpp
+      LCAO/LCAOrbitalSetWithCorrectionT.cpp
+      LCAO/CuspCorrectionConstructionT.cpp
+      LCAO/SoaCuspCorrectionT.cpp)
   if(QMC_COMPLEX)
-    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp LCAO/LCAOSpinorBuilder.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp)
   else(QMC_COMPLEX)
     #LCAO cusp correction is not ready for complex
     set(FERMION_SRCS ${FERMION_SRCS}
         LCAO/LCAOrbitalSetWithCorrection.cpp
-        LCAO/LCAOrbitalSetWithCorrectionT.cpp
 	    LCAO/CuspCorrectionConstruction.cpp
-	    LCAO/CuspCorrectionConstructionT.cpp
-        LCAO/SoaCuspCorrection.cpp
-        LCAO/SoaCuspCorrectionT.cpp)
+        LCAO/SoaCuspCorrection.cpp)
   endif(QMC_COMPLEX)
 
   if(HAVE_EINSPLINE)
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
index 1a0c574b5b..31a3f71399 100644
--- a/src/QMCWaveFunctions/CompositeSPOSetT.cpp
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
@@ -103,7 +103,7 @@ CompositeSPOSetT<T>::makeClone() const
 template <typename T>
 void
 CompositeSPOSetT<T>::evaluateValue(
-    const ParticleSet& P, int iat, ValueVector& psi)
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
     int n = 0;
     for (int c = 0; c < components.size(); ++c) {
@@ -117,7 +117,7 @@ CompositeSPOSetT<T>::evaluateValue(
 
 template <typename T>
 void
-CompositeSPOSetT<T>::evaluateVGL(const ParticleSet& P, int iat,
+CompositeSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
     ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
     int n = 0;
@@ -136,7 +136,7 @@ CompositeSPOSetT<T>::evaluateVGL(const ParticleSet& P, int iat,
 
 template <typename T>
 void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
     int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
 {
     const int nat = last - first;
@@ -155,7 +155,7 @@ CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
 
 template <typename T>
 void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
     int last, ValueMatrix& logdet, GradMatrix& dlogdet,
     HessMatrix& grad_grad_logdet)
 {
@@ -175,7 +175,7 @@ CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
 
 template <typename T>
 void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
     int last, ValueMatrix& logdet, GradMatrix& dlogdet,
     HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
 {
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h
index db7344bff9..ec597a7eb4 100644
--- a/src/QMCWaveFunctions/CompositeSPOSetT.h
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.h
@@ -79,10 +79,10 @@ class CompositeSPOSetT : public SPOSetT<T>
     makeClone() const override;
 
     void
-    evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
 
     void
-    evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi,
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
         GradVector& dpsi, ValueVector& d2psi) override;
 
     /// unimplemented functions call this to abort
@@ -94,15 +94,15 @@ class CompositeSPOSetT : public SPOSetT<T>
 
     // methods to be implemented in the future (possibly)
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet,
         ValueMatrix& d2logdet) override;
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet,
         HessMatrix& ddlogdet) override;
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
         GGGMatrix& dddlogdet) override;
 };
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
index 4257021557..b028eb039b 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
@@ -1,13 +1,13 @@
 #include "OhmmsData/AttributeSet.h"
 #include "LongRange/StructFact.h"
-#include "LongRange/KContainer.h"
+#include "LongRange/KContainerT.h"
 #include "QMCWaveFunctions/ElectronGas/FreeOrbitalT.h"
 #include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h"
 
 namespace qmcplusplus
 {
 template <typename T>
-FreeOrbitalBuilderT<T>::FreeOrbitalBuilderT(ParticleSet& els, Communicate* comm, xmlNodePtr cur)
+FreeOrbitalBuilderT<T>::FreeOrbitalBuilderT(ParticleSetT<T>& els, Communicate* comm, xmlNodePtr cur)
     : SPOSetBuilderT<T>("PW", comm), targetPtcl(els)
 {}
 
@@ -55,7 +55,7 @@ std::unique_ptr<SPOSetT<T>> FreeOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodeP
   // extract npw k-points from container
   // kpts_cart is sorted by magnitude
   std::vector<PosType> kpts(npw);
-  KContainer klists;
+  KContainerT<T> klists;
   RealType kcut = lattice.LR_kc; // to-do: reduce kcut to >~ kf
   klists.updateKLists(lattice, kcut, lattice.ndim, twist);
 
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
index dcd69fd4b8..06e4b730a3 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
@@ -12,13 +12,13 @@ class FreeOrbitalBuilderT : public SPOSetBuilderT<T>
     using RealType = typename SPOSetBuilderT<T>::RealType;
     using PosType = typename SPOSetBuilderT<T>::PosType;
 
-  FreeOrbitalBuilderT(ParticleSet& els, Communicate* comm, xmlNodePtr cur);
+  FreeOrbitalBuilderT(ParticleSetT<T>& els, Communicate* comm, xmlNodePtr cur);
   ~FreeOrbitalBuilderT() {}
 
   std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
 
 private:
-  ParticleSet& targetPtcl;
+  ParticleSetT<T>& targetPtcl;
   bool in_list(const int j, const std::vector<int> l);
 };
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
index 82428ebfe1..497f65227e 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
@@ -1,18 +1,23 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2022 QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
-//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+// National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory Yubo
+//                    "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "FreeOrbitalT.h"
@@ -20,689 +25,653 @@
 namespace qmcplusplus
 {
 
-
-template<class T>
-void FreeOrbitalT<T>::evaluateVGL(const ParticleSet& P,
-                                  int iat,
-                                  ValueVector& pvec,
-                                  GradVector& dpvec,
-                                  ValueVector& d2pvec)
-{}
-
-template<>
-void FreeOrbitalT<float>::evaluateVGL(const ParticleSet& P,
-                                      int iat,
-                                      ValueVector& pvec,
-                                      GradVector& dpvec,
-                                      ValueVector& d2pvec)
+template <class T>
+void
+FreeOrbitalT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
+    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-    dpvec[j1]    = -sinkr * kvecs[ik];
-    dpvec[j2]    = coskr * kvecs[ik];
-    d2pvec[j1]   = k2neg[ik] * coskr;
-    d2pvec[j2]   = k2neg[ik] * sinkr;
-  }
-  pvec[0]   = 1.0;
-  dpvec[0]  = 0.0;
-  d2pvec[0] = 0.0;
 }
 
-template<>
-void FreeOrbitalT<double>::evaluateVGL(const ParticleSet& P,
-                                       int iat,
-                                       ValueVector& pvec,
-                                       GradVector& dpvec,
-                                       ValueVector& d2pvec)
+template <>
+void
+FreeOrbitalT<float>::evaluateVGL(const ParticleSetT<float>& P, int iat,
+    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-    dpvec[j1]    = -sinkr * kvecs[ik];
-    dpvec[j2]    = coskr * kvecs[ik];
-    d2pvec[j1]   = k2neg[ik] * coskr;
-    d2pvec[j2]   = k2neg[ik] * sinkr;
-  }
-  pvec[0]   = 1.0;
-  dpvec[0]  = 0.0;
-  d2pvec[0] = 0.0;
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
+        dpvec[j1] = -sinkr * kvecs[ik];
+        dpvec[j2] = coskr * kvecs[ik];
+        d2pvec[j1] = k2neg[ik] * coskr;
+        d2pvec[j2] = k2neg[ik] * sinkr;
+    }
+    pvec[0] = 1.0;
+    dpvec[0] = 0.0;
+    d2pvec[0] = 0.0;
 }
 
-
-template<>
-void FreeOrbitalT<std::complex<float>>::evaluateVGL(const ParticleSet& P,
-                                                    int iat,
-                                                    ValueVector& pvec,
-                                                    GradVector& dpvec,
-                                                    ValueVector& d2pvec)
+template <>
+void
+FreeOrbitalT<double>::evaluateVGL(const ParticleSetT<double>& P, int iat,
+    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-    pvec[ik]   = ValueType(coskr, sinkr);
-    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
-    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
-  }
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
+        dpvec[j1] = -sinkr * kvecs[ik];
+        dpvec[j2] = coskr * kvecs[ik];
+        d2pvec[j1] = k2neg[ik] * coskr;
+        d2pvec[j2] = k2neg[ik] * sinkr;
+    }
+    pvec[0] = 1.0;
+    dpvec[0] = 0.0;
+    d2pvec[0] = 0.0;
 }
 
-template<>
-void FreeOrbitalT<std::complex<double>>::evaluateVGL(const ParticleSet& P,
-                                                     int iat,
-                                                     ValueVector& pvec,
-                                                     GradVector& dpvec,
-                                                     ValueVector& d2pvec)
+template <>
+void
+FreeOrbitalT<std::complex<float>>::evaluateVGL(
+    const ParticleSetT<std::complex<float>>& P, int iat, ValueVector& pvec,
+    GradVector& dpvec, ValueVector& d2pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-    pvec[ik]   = ValueType(coskr, sinkr);
-    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
-    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
-  }
-}
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
 
+        pvec[ik] = ValueType(coskr, sinkr);
+        dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik];
+        d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+    }
+}
 
-template<>
-void FreeOrbitalT<float>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+template <>
+void
+FreeOrbitalT<std::complex<double>>::evaluateVGL(
+    const ParticleSetT<std::complex<double>>& P, int iat, ValueVector& pvec,
+    GradVector& dpvec, ValueVector& d2pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-  }
-  pvec[0] = 1.0;
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+        pvec[ik] = ValueType(coskr, sinkr);
+        dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik];
+        d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+    }
 }
 
-template<>
-void FreeOrbitalT<double>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+template <>
+void
+FreeOrbitalT<float>::evaluateValue(
+    const ParticleSetT<float>& P, int iat, ValueVector& pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-  }
-  pvec[0] = 1.0;
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
+    }
+    pvec[0] = 1.0;
 }
 
-template<>
-void FreeOrbitalT<std::complex<float>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+template <>
+void
+FreeOrbitalT<double>::evaluateValue(
+    const ParticleSetT<double>& P, int iat, ValueVector& pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-    pvec[ik]     = std::complex<float>(coskr, sinkr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-  }
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
+    }
+    pvec[0] = 1.0;
 }
 
-template<>
-void FreeOrbitalT<std::complex<double>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+template <>
+void
+FreeOrbitalT<std::complex<float>>::evaluateValue(
+    const ParticleSetT<std::complex<float>>& P, int iat, ValueVector& pvec)
 {
-  const PosType& r = P.activeR(iat);
-  RealType sinkr, coskr;
-  for (int ik = mink; ik < maxk; ik++)
-  {
-    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-    pvec[ik]     = std::complex<double>(coskr, sinkr);
-    const int j2 = 2 * ik;
-    const int j1 = j2 - 1;
-    pvec[j1]     = coskr;
-    pvec[j2]     = sinkr;
-  }
+    const PosType& r = P.activeR(iat);
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+        pvec[ik] = std::complex<float>(coskr, sinkr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
+    }
 }
 
-template<class T>
-void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
-                                           int first,
-                                           int last,
-                                           ValueMatrix& phi,
-                                           GradMatrix& dphi,
-                                           HessMatrix& d2phi_mat)
-{}
-
-
-template<>
-void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
-                                               int first,
-                                               int last,
-                                               ValueMatrix& phi,
-                                               GradMatrix& dphi,
-                                               HessMatrix& d2phi_mat)
+template <>
+void
+FreeOrbitalT<std::complex<double>>::evaluateValue(
+    const ParticleSetT<std::complex<double>>& P, int iat, ValueVector& pvec)
 {
-  RealType sinkr, coskr;
-  float phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], this->OrbitalSetSize);
-    GradVector dp(dphi[i], this->OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
     const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const int j2 = 2 * ik;
-      const int j1 = j2 - 1;
-      p[j1]        = coskr;
-      p[j2]        = sinkr;
-      dp[j1]       = -sinkr * kvecs[ik];
-      dp[j2]       = coskr * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j1](lb, la) = hess[j1](la, lb);
-          hess[j2](lb, la) = hess[j2](la, lb);
-        }
-      }
+    RealType sinkr, coskr;
+    for (int ik = mink; ik < maxk; ik++) {
+        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+        pvec[ik] = std::complex<double>(coskr, sinkr);
+        const int j2 = 2 * ik;
+        const int j1 = j2 - 1;
+        pvec[j1] = coskr;
+        pvec[j2] = sinkr;
     }
-    p[0]    = 1.0;
-    dp[0]   = 0.0;
-    hess[0] = 0.0;
-  }
 }
 
-template<>
-void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
-                                                int first,
-                                                int last,
-                                                ValueMatrix& phi,
-                                                GradMatrix& dphi,
-                                                HessMatrix& d2phi_mat)
+template <class T>
+void
+FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
 {
-  RealType sinkr, coskr;
-  double phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], this->OrbitalSetSize);
-    GradVector dp(dphi[i], this->OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+}
 
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const int j2 = 2 * ik;
-      const int j1 = j2 - 1;
-      p[j1]        = coskr;
-      p[j2]        = sinkr;
-      dp[j1]       = -sinkr * kvecs[ik];
-      dp[j2]       = coskr * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j1](lb, la) = hess[j1](la, lb);
-          hess[j2](lb, la) = hess[j2](la, lb);
+template <>
+void
+FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
+    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
+    HessMatrix& d2phi_mat)
+{
+    RealType sinkr, coskr;
+    float phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], this->OrbitalSetSize);
+        GradVector dp(dphi[i], this->OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const int j2 = 2 * ik;
+            const int j1 = j2 - 1;
+            p[j1] = coskr;
+            p[j2] = sinkr;
+            dp[j1] = -sinkr * kvecs[ik];
+            dp[j2] = coskr * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[j1](la, lb) =
+                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j2](la, lb) =
+                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j1](lb, la) = hess[j1](la, lb);
+                    hess[j2](lb, la) = hess[j2](la, lb);
+                }
+            }
         }
-      }
+        p[0] = 1.0;
+        dp[0] = 0.0;
+        hess[0] = 0.0;
     }
-    p[0]    = 1.0;
-    dp[0]   = 0.0;
-    hess[0] = 0.0;
-  }
 }
 
-
-template<>
-void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             ValueMatrix& phi,
-                                                             GradMatrix& dphi,
-                                                             HessMatrix& d2phi_mat)
+template <>
+void
+FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
+    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
+    HessMatrix& d2phi_mat)
 {
-  RealType sinkr, coskr;
-  std::complex<float> phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], this->OrbitalSetSize);
-    GradVector dp(dphi[i], this->OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-      phi_of_r = std::complex<float>(coskr, sinkr);
-      p[ik]    = phi_of_r;
-
-      dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[ik](lb, la) = hess[ik](la, lb);
+    RealType sinkr, coskr;
+    double phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], this->OrbitalSetSize);
+        GradVector dp(dphi[i], this->OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const int j2 = 2 * ik;
+            const int j1 = j2 - 1;
+            p[j1] = coskr;
+            p[j2] = sinkr;
+            dp[j1] = -sinkr * kvecs[ik];
+            dp[j2] = coskr * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[j1](la, lb) =
+                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j2](la, lb) =
+                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j1](lb, la) = hess[j1](la, lb);
+                    hess[j2](lb, la) = hess[j2](la, lb);
+                }
+            }
         }
-      }
+        p[0] = 1.0;
+        dp[0] = 0.0;
+        hess[0] = 0.0;
     }
-  }
 }
 
-template<>
-void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
-                                                              int first,
-                                                              int last,
-                                                              ValueMatrix& phi,
-                                                              GradMatrix& dphi,
-                                                              HessMatrix& d2phi_mat)
+template <>
+void
+FreeOrbitalT<std::complex<float>>::evaluate_notranspose(
+    const ParticleSetT<std::complex<float>>& P, int first, int last,
+    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
 {
-  RealType sinkr, coskr;
-  std::complex<double> phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], this->OrbitalSetSize);
-    GradVector dp(dphi[i], this->OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-      phi_of_r = std::complex<double>(coskr, sinkr);
-      p[ik]    = phi_of_r;
-
-      dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[ik](lb, la) = hess[ik](la, lb);
+    RealType sinkr, coskr;
+    std::complex<float> phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], this->OrbitalSetSize);
+        GradVector dp(dphi[i], this->OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+            phi_of_r = std::complex<float>(coskr, sinkr);
+            p[ik] = phi_of_r;
+
+            dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[ik](la, la) =
+                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[ik](la, lb) =
+                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[ik](lb, la) = hess[ik](la, lb);
+                }
+            }
         }
-      }
     }
-  }
 }
 
-template<class T>
-void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
-                                           int first,
-                                           int last,
-                                           ValueMatrix& phi,
-                                           GradMatrix& dphi,
-                                           HessMatrix& d2phi_mat,
-                                           GGGMatrix& d3phi_mat)
-{}
-
-template<>
-void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
-                                               int first,
-                                               int last,
-                                               ValueMatrix& phi,
-                                               GradMatrix& dphi,
-                                               HessMatrix& d2phi_mat,
-                                               GGGMatrix& d3phi_mat)
+template <>
+void
+FreeOrbitalT<std::complex<double>>::evaluate_notranspose(
+    const ParticleSetT<std::complex<double>>& P, int first, int last,
+    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
 {
-  RealType sinkr, coskr;
-  ValueType phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], OrbitalSetSize);
-    GradVector dp(dphi[i], OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], OrbitalSetSize);
-    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const int j2 = 2 * ik;
-      const int j1 = j2 - 1;
-      p[j1]        = coskr;
-      p[j2]        = sinkr;
-      dp[j1]       = -sinkr * kvecs[ik];
-      dp[j2]       = coskr * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j1](lb, la)    = hess[j1](la, lb);
-          hess[j2](lb, la)    = hess[j2](la, lb);
-          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
-          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
-          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
-          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
-          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
-          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
-          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
-          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
-          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
-          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
-          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
-          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
-          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
-          {
-            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
-            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
-            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
-            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
-            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
-            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
-            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
-            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
-            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
-            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
-            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
-            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
-          }
+    RealType sinkr, coskr;
+    std::complex<double> phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], this->OrbitalSetSize);
+        GradVector dp(dphi[i], this->OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+            phi_of_r = std::complex<double>(coskr, sinkr);
+            p[ik] = phi_of_r;
+
+            dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[ik](la, la) =
+                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[ik](la, lb) =
+                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[ik](lb, la) = hess[ik](la, lb);
+                }
+            }
         }
-      }
     }
-
-    p[0]    = 1.0;
-    dp[0]   = 0.0;
-    hess[0] = 0.0;
-    ggg[0]  = 0.0;
-  }
 }
 
-template<>
-void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
-                                                int first,
-                                                int last,
-                                                ValueMatrix& phi,
-                                                GradMatrix& dphi,
-                                                HessMatrix& d2phi_mat,
-                                                GGGMatrix& d3phi_mat)
+template <class T>
+void
+FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
+    GGGMatrix& d3phi_mat)
 {
-  RealType sinkr, coskr;
-  ValueType phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], OrbitalSetSize);
-    GradVector dp(dphi[i], OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], OrbitalSetSize);
-    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+}
 
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const int j2 = 2 * ik;
-      const int j1 = j2 - 1;
-      p[j1]        = coskr;
-      p[j2]        = sinkr;
-      dp[j1]       = -sinkr * kvecs[ik];
-      dp[j2]       = coskr * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[j1](lb, la)    = hess[j1](la, lb);
-          hess[j2](lb, la)    = hess[j2](la, lb);
-          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
-          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
-          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
-          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
-          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
-          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
-          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
-          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
-          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
-          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
-          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
-          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
-          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
-          {
-            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
-            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
-            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
-            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
-            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
-            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
-            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
-            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
-            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
-            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
-            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
-            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
-          }
+template <>
+void
+FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
+    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
+    HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat)
+{
+    RealType sinkr, coskr;
+    ValueType phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], OrbitalSetSize);
+        GradVector dp(dphi[i], OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], OrbitalSetSize);
+        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const int j2 = 2 * ik;
+            const int j1 = j2 - 1;
+            p[j1] = coskr;
+            p[j2] = sinkr;
+            dp[j1] = -sinkr * kvecs[ik];
+            dp[j2] = coskr * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                ggg[j1][la](la, la) =
+                    sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+                ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] *
+                    (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[j1](la, lb) =
+                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j2](la, lb) =
+                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j1](lb, la) = hess[j1](la, lb);
+                    hess[j2](lb, la) = hess[j2](la, lb);
+                    ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[la];
+                    ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[la];
+                    ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+                    ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+                    ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+                    ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+                    ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
+                    ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
+                    ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+                    ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+                    ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+                    ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+                    for (int lc = lb + 1; lc < OHMMS_DIM; lc++) {
+                        ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] *
+                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
+                        ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] *
+                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
+                        ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+                        ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+                        ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+                        ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+                        ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+                        ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+                        ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+                        ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+                        ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+                        ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+                    }
+                }
+            }
         }
-      }
-    }
 
-    p[0]    = 1.0;
-    dp[0]   = 0.0;
-    hess[0] = 0.0;
-    ggg[0]  = 0.0;
-  }
+        p[0] = 1.0;
+        dp[0] = 0.0;
+        hess[0] = 0.0;
+        ggg[0] = 0.0;
+    }
 }
 
-template<>
-void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             ValueMatrix& phi,
-                                                             GradMatrix& dphi,
-                                                             HessMatrix& d2phi_mat,
-                                                             GGGMatrix& d3phi_mat)
+template <>
+void
+FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
+    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
+    HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat)
 {
-  RealType sinkr, coskr;
-  ValueType phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], OrbitalSetSize);
-    GradVector dp(dphi[i], OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], OrbitalSetSize);
-    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const ValueType compi(0, 1);
-      phi_of_r = ValueType(coskr, sinkr);
-      p[ik]    = phi_of_r;
-      dp[ik]   = compi * phi_of_r * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[ik](lb, la) = hess[ik](la, lb);
+    RealType sinkr, coskr;
+    ValueType phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], OrbitalSetSize);
+        GradVector dp(dphi[i], OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], OrbitalSetSize);
+        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const int j2 = 2 * ik;
+            const int j1 = j2 - 1;
+            p[j1] = coskr;
+            p[j2] = sinkr;
+            dp[j1] = -sinkr * kvecs[ik];
+            dp[j2] = coskr * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+                ggg[j1][la](la, la) =
+                    sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+                ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] *
+                    (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[j1](la, lb) =
+                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j2](la, lb) =
+                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[j1](lb, la) = hess[j1](la, lb);
+                    hess[j2](lb, la) = hess[j2](la, lb);
+                    ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[la];
+                    ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[la];
+                    ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+                    ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+                    ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+                    ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+                    ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
+                    ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] *
+                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
+                    ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+                    ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+                    ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+                    ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+                    for (int lc = lb + 1; lc < OHMMS_DIM; lc++) {
+                        ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] *
+                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
+                        ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] *
+                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
+                        ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+                        ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+                        ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+                        ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+                        ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+                        ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+                        ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+                        ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+                        ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+                        ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+                    }
+                }
+            }
         }
-      }
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
-      }
+
+        p[0] = 1.0;
+        dp[0] = 0.0;
+        hess[0] = 0.0;
+        ggg[0] = 0.0;
     }
-  }
 }
 
-template<>
-void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
-                                                              int first,
-                                                              int last,
-                                                              ValueMatrix& phi,
-                                                              GradMatrix& dphi,
-                                                              HessMatrix& d2phi_mat,
-                                                              GGGMatrix& d3phi_mat)
+template <>
+void
+FreeOrbitalT<std::complex<float>>::evaluate_notranspose(
+    const ParticleSetT<std::complex<float>>& P, int first, int last,
+    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
+    GGGMatrix& d3phi_mat)
 {
-  RealType sinkr, coskr;
-  ValueType phi_of_r;
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], OrbitalSetSize);
-    GradVector dp(dphi[i], OrbitalSetSize);
-    HessVector hess(d2phi_mat[i], OrbitalSetSize);
-    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+    RealType sinkr, coskr;
+    ValueType phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], OrbitalSetSize);
+        GradVector dp(dphi[i], OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], OrbitalSetSize);
+        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const ValueType compi(0, 1);
+            phi_of_r = ValueType(coskr, sinkr);
+            p[ik] = phi_of_r;
+            dp[ik] = compi * phi_of_r * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[ik](la, la) =
+                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[ik](la, lb) =
+                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[ik](lb, la) = hess[ik](la, lb);
+                }
+            }
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+            }
+        }
+    }
+}
 
-    const PosType& r = P.activeR(iat);
-    for (int ik = mink; ik < maxk; ik++)
-    {
-      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-      const ValueType compi(0, 1);
-      phi_of_r = ValueType(coskr, sinkr);
-      p[ik]    = phi_of_r;
-      dp[ik]   = compi * phi_of_r * kvecs[ik];
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
-        {
-          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-          hess[ik](lb, la) = hess[ik](la, lb);
+template <>
+void
+FreeOrbitalT<std::complex<double>>::evaluate_notranspose(
+    const ParticleSetT<std::complex<double>>& P, int first, int last,
+    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
+    GGGMatrix& d3phi_mat)
+{
+    RealType sinkr, coskr;
+    ValueType phi_of_r;
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], OrbitalSetSize);
+        GradVector dp(dphi[i], OrbitalSetSize);
+        HessVector hess(d2phi_mat[i], OrbitalSetSize);
+        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+        const PosType& r = P.activeR(iat);
+        for (int ik = mink; ik < maxk; ik++) {
+            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+            const ValueType compi(0, 1);
+            phi_of_r = ValueType(coskr, sinkr);
+            p[ik] = phi_of_r;
+            dp[ik] = compi * phi_of_r * kvecs[ik];
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                hess[ik](la, la) =
+                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
+                    hess[ik](la, lb) =
+                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+                    hess[ik](lb, la) = hess[ik](la, lb);
+                }
+            }
+            for (int la = 0; la < OHMMS_DIM; la++) {
+                ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+            }
         }
-      }
-      for (int la = 0; la < OHMMS_DIM; la++)
-      {
-        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
-      }
     }
-  }
 }
 
 // generic implementation
 
-template<class T>
+template <class T>
 FreeOrbitalT<T>::~FreeOrbitalT()
-{}
-
-template<class T>
-void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
-                                           int first,
-                                           int last,
-                                           ValueMatrix& phi,
-                                           GradMatrix& dphi,
-                                           ValueMatrix& d2phi)
 {
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    ValueVector p(phi[i], this->OrbitalSetSize);
-    GradVector dp(dphi[i], this->OrbitalSetSize);
-    ValueVector d2p(d2phi[i], this->OrbitalSetSize);
-    evaluateVGL(P, iat, p, dp, d2p);
-  }
 }
 
-//Explicit template specialization
-template<>
-FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<float>(my_name),
-      kvecs(kpts_cart),
-      mink(1), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
+template <class T>
+void
+FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi)
 {
-  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        ValueVector p(phi[i], this->OrbitalSetSize);
+        GradVector dp(dphi[i], this->OrbitalSetSize);
+        ValueVector d2p(d2phi[i], this->OrbitalSetSize);
+        evaluateVGL(P, iat, p, dp, d2p);
+    }
 }
 
-template<>
-FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<double>(my_name),
-      kvecs(kpts_cart),
-      mink(1), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
+// Explicit template specialization
+template <>
+FreeOrbitalT<float>::FreeOrbitalT(
+    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
+    SPOSetT<float>(my_name),
+    kvecs(kpts_cart),
+    mink(1), // treat k=0 as special case
+    maxk(kpts_cart.size()),
+    k2neg(maxk)
 {
-  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+    this->OrbitalSetSize =
+        2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+    for (int ik = 0; ik < maxk; ik++)
+        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template<>
-FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<std::complex<float>>(my_name),
-      kvecs(kpts_cart),
-      mink(0), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
+template <>
+FreeOrbitalT<double>::FreeOrbitalT(
+    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
+    SPOSetT<double>(my_name),
+    kvecs(kpts_cart),
+    mink(1), // treat k=0 as special case
+    maxk(kpts_cart.size()),
+    k2neg(maxk)
 {
-  this->OrbitalSetSize = maxk; // SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+    this->OrbitalSetSize =
+        2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+    for (int ik = 0; ik < maxk; ik++)
+        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template<>
-FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<std::complex<double>>(my_name),
-      kvecs(kpts_cart),
-      mink(0), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
+template <>
+FreeOrbitalT<std::complex<float>>::FreeOrbitalT(
+    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
+    SPOSetT<std::complex<float>>(my_name),
+    kvecs(kpts_cart),
+    mink(0), // treat k=0 as special case
+    maxk(kpts_cart.size()),
+    k2neg(maxk)
 {
-  this->OrbitalSetSize = maxk; // SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+    this->OrbitalSetSize = maxk; // SPOSet member
+    for (int ik = 0; ik < maxk; ik++)
+        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
+template <>
+FreeOrbitalT<std::complex<double>>::FreeOrbitalT(
+    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
+    SPOSetT<std::complex<double>>(my_name),
+    kvecs(kpts_cart),
+    mink(0), // treat k=0 as special case
+    maxk(kpts_cart.size()),
+    k2neg(maxk)
+{
+    this->OrbitalSetSize = maxk; // SPOSet member
+    for (int ik = 0; ik < maxk; ik++)
+        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
 
-template<class T>
-void FreeOrbitalT<T>::report(const std::string& pad) const
+template <class T>
+void
+FreeOrbitalT<T>::report(const std::string& pad) const
 {
-  app_log() << pad << "FreeOrbital report" << std::endl;
-  for (int ik = 0; ik < kvecs.size(); ik++)
-  {
-    app_log() << pad << ik << " " << kvecs[ik] << std::endl;
-  }
-  app_log() << pad << "end FreeOrbital report" << std::endl;
-  app_log().flush();
+    app_log() << pad << "FreeOrbital report" << std::endl;
+    for (int ik = 0; ik < kvecs.size(); ik++) {
+        app_log() << pad << ik << " " << kvecs[ik] << std::endl;
+    }
+    app_log() << pad << "end FreeOrbital report" << std::endl;
+    app_log().flush();
 }
 
 template class FreeOrbitalT<float>;
@@ -710,5 +679,4 @@ template class FreeOrbitalT<double>;
 template class FreeOrbitalT<std::complex<float>>;
 template class FreeOrbitalT<std::complex<double>>;
 
-
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
index d2f2f450b8..18e8899cca 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
@@ -1,18 +1,23 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2022 QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
-//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+// National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jeongnim Kim,
+//                    jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory Yubo
+//                    "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_FREE_ORBITALT_H
@@ -22,66 +27,74 @@
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 class FreeOrbitalT : public SPOSetT<T>
 {
 public:
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using HessVector  = typename SPOSetT<T>::HessVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
-  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
-  using RealType    = typename SPOSetT<T>::RealType;
-  using PosType     = typename SPOSetT<T>::PosType;
-  using ValueType   = typename SPOSetT<T>::ValueType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using HessVector = typename SPOSetT<T>::HessVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using HessMatrix = typename SPOSetT<T>::HessMatrix;
+    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+    using RealType = typename SPOSetT<T>::RealType;
+    using PosType = typename SPOSetT<T>::PosType;
+    using ValueType = typename SPOSetT<T>::ValueType;
 
-  FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart);
-  ~FreeOrbitalT();
+    FreeOrbitalT(
+        const std::string& my_name, const std::vector<PosType>& kpts_cart);
+    ~FreeOrbitalT();
 
-  inline std::string getClassName() const final { return "FreeOrbital"; }
+    inline std::string
+    getClassName() const final
+    {
+        return "FreeOrbital";
+    }
 
-  // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
-  //  i \in [first, last)
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& phi,
-                            GradMatrix& dphi,
-                            ValueMatrix& d2phi) final;
+    // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
+    //  i \in [first, last)
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) final;
 
-  // plug r_i into all orbitals
-  void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final;
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) final;
+    // plug r_i into all orbitals
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int i, ValueVector& pvec,
+        GradVector& dpvec, ValueVector& d2pvec) final;
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& pvec) final;
 
-  // hessian matrix is needed by backflow
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& phi,
-                            GradMatrix& dphi,
-                            HessMatrix& d2phi_mat) final;
+    // hessian matrix is needed by backflow
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) final;
 
-  // derivative of hessian is needed to optimize backflow
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& phi,
-                            GradMatrix& dphi,
-                            HessMatrix& d2phi_mat,
-                            GGGMatrix& d3phi_mat) override;
+    // derivative of hessian is needed to optimize backflow
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
+        GGGMatrix& d3phi_mat) override;
 
-  void report(const std::string& pad) const override;
-  // ---- begin required overrides
-  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<FreeOrbitalT<T>>(*this); }
-  void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); }
-  // required overrides end ----
+    void
+    report(const std::string& pad) const override;
+    // ---- begin required overrides
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const final
+    {
+        return std::make_unique<FreeOrbitalT<T>>(*this);
+    }
+    void
+    setOrbitalSetSize(int norbs) final
+    {
+        throw std::runtime_error("not implemented");
+    }
+    // required overrides end ----
 private:
-  const std::vector<PosType> kvecs; // kvecs vectors
-  const int mink;                   // minimum k index
-  const int maxk;                   // maximum number of kvecs vectors
-  std::vector<RealType> k2neg;      // minus kvecs^2
+    const std::vector<PosType> kvecs; // kvecs vectors
+    const int mink; // minimum k index
+    const int maxk; // maximum number of kvecs vectors
+    std::vector<RealType> k2neg; // minus kvecs^2
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
index 0e1638f765..77ae1eda5a 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
@@ -1,204 +1,211 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #include "SHOSetBuilderT.h"
-#include "QMCWaveFunctions/SPOSetInputInfo.h"
+
 #include "OhmmsData/AttributeSet.h"
+#include "QMCWaveFunctions/SPOSetInputInfo.h"
 #include "Utilities/IteratorUtility.h"
 #include "Utilities/string_utils.h"
 
-
 namespace qmcplusplus
 {
-template<class T>
-SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSet& P, Communicate* comm) : SPOSetBuilderT<T>("SHO", comm), Ps(P)
+template <class T>
+SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm) :
+    SPOSetBuilderT<T>("SHO", comm),
+    Ps(P)
 {
-  this->ClassName = "SHOSetBuilderT";
-  this->legacy    = false;
-  app_log() << "Constructing SHOSetBuilderT" << std::endl;
-  reset();
+    this->ClassName = "SHOSetBuilderT";
+    this->legacy = false;
+    app_log() << "Constructing SHOSetBuilderT" << std::endl;
+    reset();
 }
 
-template<class T>
+template <class T>
 SHOSetBuilderT<T>::~SHOSetBuilderT() = default;
 
-template<class T>
-void SHOSetBuilderT<T>::reset()
+template <class T>
+void
+SHOSetBuilderT<T>::reset()
 {
-  nstates = 0;
-  mass    = -1.0;
-  energy  = -1.0;
-  length  = -1.0;
-  center  = 0.0;
+    nstates = 0;
+    mass = -1.0;
+    energy = -1.0;
+    length = -1.0;
+    center = 0.0;
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-  APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not use legacy interface");
+    APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not "
+              "use legacy interface");
 
-  app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
+    app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
 
-  SPOSetInputInfo input(cur);
+    SPOSetInputInfo input(cur);
 
-  return createSPOSet(cur, input);
+    return createSPOSet(cur, input);
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
 {
-  app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
-  reset();
-
-  // read parameters
-  std::string spo_name = "sho";
-  OhmmsAttributeSet attrib;
-  attrib.add(spo_name, "name");
-  attrib.add(spo_name, "id");
-  attrib.add(mass, "mass");
-  attrib.add(energy, "energy");
-  attrib.add(energy, "frequency");
-  attrib.add(length, "length");
-  attrib.add(center, "center");
-  attrib.add(nstates, "size");
-  attrib.put(cur);
-
-  if (energy < 0.0)
-    energy = 1.0;
-  if (mass < 0.0 && length < 0.0)
-    length = 1.0;
-  if (mass < 0.0)
-    mass = 1.0 / (energy * length * length);
-  else if (length < 0.0)
-    length = 1.0 / std::sqrt(mass * energy);
-
-  // initialize states and/or adjust basis
-  int smax = -1;
-  if (input.has_index_info)
-    smax = std::max(smax, input.max_index());
-  if (input.has_energy_info)
-  {
-    smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
-  }
-  if (smax < 0)
-    APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
-  update_basis_states(smax);
-
-  // create sho state request
-  indices_t& indices = input.get_indices(this->states);
-  std::vector<SHOState*> sho_states;
-  for (int i = 0; i < indices.size(); ++i)
-    sho_states.push_back(basis_states[indices[i]]);
-
-  // make the sposet
-  auto sho = std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
-
-  sho->report("  ");
-  return sho;
+    app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
+    reset();
+
+    // read parameters
+    std::string spo_name = "sho";
+    OhmmsAttributeSet attrib;
+    attrib.add(spo_name, "name");
+    attrib.add(spo_name, "id");
+    attrib.add(mass, "mass");
+    attrib.add(energy, "energy");
+    attrib.add(energy, "frequency");
+    attrib.add(length, "length");
+    attrib.add(center, "center");
+    attrib.add(nstates, "size");
+    attrib.put(cur);
+
+    if (energy < 0.0)
+        energy = 1.0;
+    if (mass < 0.0 && length < 0.0)
+        length = 1.0;
+    if (mass < 0.0)
+        mass = 1.0 / (energy * length * length);
+    else if (length < 0.0)
+        length = 1.0 / std::sqrt(mass * energy);
+
+    // initialize states and/or adjust basis
+    int smax = -1;
+    if (input.has_index_info)
+        smax = std::max(smax, input.max_index());
+    if (input.has_energy_info) {
+        smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
+    }
+    if (smax < 0)
+        APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
+    update_basis_states(smax);
+
+    // create sho state request
+    indices_t& indices = input.get_indices(this->states);
+    std::vector<SHOState*> sho_states;
+    for (int i = 0; i < indices.size(); ++i)
+        sho_states.push_back(basis_states[indices[i]]);
+
+    // make the sposet
+    auto sho =
+        std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
+
+    sho->report("  ");
+    return sho;
 }
 
-template<class T>
-void SHOSetBuilderT<T>::update_basis_states(int smax)
+template <class T>
+void
+SHOSetBuilderT<T>::update_basis_states(int smax)
 {
-  int states_required = smax - basis_states.size() + 1;
-  if (states_required > 0)
-  {
-    RealType N = smax + 1;
-    if (QMCTraits::DIM == 1)
-      nmax = smax;
-    else if (QMCTraits::DIM == 2)
-      nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
-    else if (QMCTraits::DIM == 3)
-    {
-      RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
-      nmax       = std::ceil(f / 3. + 1. / f - 2.);
-    }
-    else
-      APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other than 1, 2, or 3 are not supported");
-    int ndim                     = nmax + 1;
-    ind_dims[QMCTraits::DIM - 1] = 1;
-    for (int d = QMCTraits::DIM - 2; d > -1; --d)
-      ind_dims[d] = ind_dims[d + 1] * ndim;
-    int s    = 0;
-    int ntot = pow(ndim, QMCTraits::DIM);
-    TinyVector<int, QMCTraits::DIM> qnumber;
-    for (int m = 0; m < ntot; ++m)
-    {
-      int n    = 0; // principal quantum number
-      int nrem = m;
-      for (int d = 0; d < QMCTraits::DIM; ++d)
-      {
-        int i = nrem / ind_dims[d];
-        nrem -= i * ind_dims[d];
-        qnumber[d] = i;
-        n += i;
-      }
-      if (n <= nmax)
-      {
-        SHOState* st;
-        if (s < basis_states.size())
-          st = basis_states[s];
+    int states_required = smax - basis_states.size() + 1;
+    if (states_required > 0) {
+        RealType N = smax + 1;
+        if (QMCTraits::DIM == 1)
+            nmax = smax;
+        else if (QMCTraits::DIM == 2)
+            nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
+        else if (QMCTraits::DIM == 3) {
+            RealType f = std::exp(1.0 / 3.0 *
+                std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
+            nmax = std::ceil(f / 3. + 1. / f - 2.);
+        }
         else
-        {
-          st = new SHOState();
-          basis_states.add(st);
+            APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other "
+                      "than 1, 2, or 3 are not supported");
+        int ndim = nmax + 1;
+        ind_dims[QMCTraits::DIM - 1] = 1;
+        for (int d = QMCTraits::DIM - 2; d > -1; --d)
+            ind_dims[d] = ind_dims[d + 1] * ndim;
+        int s = 0;
+        int ntot = pow(ndim, QMCTraits::DIM);
+        TinyVector<int, QMCTraits::DIM> qnumber;
+        for (int m = 0; m < ntot; ++m) {
+            int n = 0; // principal quantum number
+            int nrem = m;
+            for (int d = 0; d < QMCTraits::DIM; ++d) {
+                int i = nrem / ind_dims[d];
+                nrem -= i * ind_dims[d];
+                qnumber[d] = i;
+                n += i;
+            }
+            if (n <= nmax) {
+                SHOState* st;
+                if (s < basis_states.size())
+                    st = basis_states[s];
+                else {
+                    st = new SHOState();
+                    basis_states.add(st);
+                }
+                RealType e = energy * (n + .5 * QMCTraits::DIM);
+                st->set(qnumber, e);
+                s++;
+            }
         }
-        RealType e = energy * (n + .5 * QMCTraits::DIM);
-        st->set(qnumber, e);
-        s++;
-      }
+        basis_states.energy_sort(1e-6, true);
+    }
+
+    // reset energy scale even if no states need to be added
+    for (int i = 0; i < basis_states.size(); ++i) {
+        SHOState& state = *basis_states[i];
+        const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
+        int n = 0;
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+            n += qnumber[d];
+        state.energy = energy * (n + .5 * QMCTraits::DIM);
     }
-    basis_states.energy_sort(1e-6, true);
-  }
-
-  // reset energy scale even if no states need to be added
-  for (int i = 0; i < basis_states.size(); ++i)
-  {
-    SHOState& state                                = *basis_states[i];
-    const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
-    int n                                          = 0;
-    for (int d = 0; d < QMCTraits::DIM; ++d)
-      n += qnumber[d];
-    state.energy = energy * (n + .5 * QMCTraits::DIM);
-  }
-
-  //somewhat redundant, but necessary
-  this->clear_states(0);
-  this->states[0]->finish(basis_states.states);
-
-  if (basis_states.size() <= smax)
-    APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough states");
+
+    // somewhat redundant, but necessary
+    this->clear_states(0);
+    this->states[0]->finish(basis_states.states);
+
+    if (basis_states.size() <= smax)
+        APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough "
+                  "states");
 }
 
-template<class T>
-void SHOSetBuilderT<T>::report(const std::string& pad)
+template <class T>
+void
+SHOSetBuilderT<T>::report(const std::string& pad)
 {
-  app_log() << pad << "SHOSetBuilderT report" << std::endl;
-  app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
-  app_log() << pad << "  mass      = " << mass << std::endl;
-  app_log() << pad << "  frequency = " << energy << std::endl;
-  app_log() << pad << "  energy    = " << energy << std::endl;
-  app_log() << pad << "  length    = " << length << std::endl;
-  app_log() << pad << "  center    = " << center << std::endl;
-  app_log() << pad << "  nstates   = " << nstates << std::endl;
-  app_log() << pad << "  nmax      = " << nmax << std::endl;
-  app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
-  app_log() << pad << "  # basis states = " << basis_states.size() << std::endl;
-  app_log() << pad << "  basis_states" << std::endl;
-  for (int s = 0; s < basis_states.size(); ++s)
-    basis_states[s]->report(pad + "  " + int2string(s) + " ");
-  app_log() << pad << "end SHOSetBuilderT report" << std::endl;
-  app_log().flush();
+    app_log() << pad << "SHOSetBuilderT report" << std::endl;
+    app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
+    app_log() << pad << "  mass      = " << mass << std::endl;
+    app_log() << pad << "  frequency = " << energy << std::endl;
+    app_log() << pad << "  energy    = " << energy << std::endl;
+    app_log() << pad << "  length    = " << length << std::endl;
+    app_log() << pad << "  center    = " << center << std::endl;
+    app_log() << pad << "  nstates   = " << nstates << std::endl;
+    app_log() << pad << "  nmax      = " << nmax << std::endl;
+    app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
+    app_log() << pad << "  # basis states = " << basis_states.size()
+              << std::endl;
+    app_log() << pad << "  basis_states" << std::endl;
+    for (int s = 0; s < basis_states.size(); ++s)
+        basis_states[s]->report(pad + "  " + int2string(s) + " ");
+    app_log() << pad << "end SHOSetBuilderT report" << std::endl;
+    app_log().flush();
 }
 
 template class SHOSetBuilderT<double>;
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
index 7b3e9430d8..96237ab55e 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
@@ -1,16 +1,18 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
 #define QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
 
@@ -20,43 +22,48 @@
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 class SHOSetBuilderT : public SPOSetBuilderT<T>
 {
 public:
-  using RealType  = typename SPOSetT<T>::RealType;
-  using PosType   = typename SPOSetT<T>::PosType;
-  using indices_t = typename SPOSetBuilderT<T>::indices_t;
+    using RealType = typename SPOSetT<T>::RealType;
+    using PosType = typename SPOSetT<T>::PosType;
+    using indices_t = typename SPOSetBuilderT<T>::indices_t;
 
-  ParticleSet& Ps;
+    ParticleSetT<T>& Ps;
 
-  RealType length;
-  RealType mass;
-  RealType energy;
-  PosType center;
+    RealType length;
+    RealType mass;
+    RealType energy;
+    PosType center;
 
-  int nstates;
-  int nmax;
-  TinyVector<int, QMCTraits::DIM> ind_dims;
+    int nstates;
+    int nmax;
+    TinyVector<int, QMCTraits::DIM> ind_dims;
 
-  SPOSetInfoSimple<SHOState> basis_states;
+    SPOSetInfoSimple<SHOState> basis_states;
 
-  //construction/destruction
-  SHOSetBuilderT(ParticleSet& P, Communicate* comm);
+    // construction/destruction
+    SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm);
 
-  ~SHOSetBuilderT() override;
+    ~SHOSetBuilderT() override;
 
-  //reset parameters
-  void reset();
+    // reset parameters
+    void
+    reset();
 
-  //SPOSetBuilder interface
-  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
+    // SPOSetBuilder interface
+    std::unique_ptr<SPOSetT<T>>
+    createSPOSetFromXML(xmlNodePtr cur) override;
 
-  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
+    std::unique_ptr<SPOSetT<T>>
+    createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
 
-  //local functions
-  void update_basis_states(int smax);
-  void report(const std::string& pad = "");
+    // local functions
+    void
+    update_basis_states(int smax);
+    void
+    report(const std::string& pad = "");
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
index 76a606151d..b4e55a258d 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
@@ -1,571 +1,555 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #include "SHOSetT.h"
+
 #include "Utilities/string_utils.h"
 
 namespace qmcplusplus
 {
 template <typename T>
-SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states)
-    : SPOSetT<T>(my_name), length(l), center(c)
+SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c,
+    const std::vector<SHOState*>& sho_states) :
+    SPOSetT<T>(my_name),
+    length(l),
+    center(c)
 {
-  state_info.resize(sho_states.size());
-  for (int s = 0; s < sho_states.size(); ++s)
-    state_info[s] = *sho_states[s];
-  initialize();
+    state_info.resize(sho_states.size());
+    for (int s = 0; s < sho_states.size(); ++s)
+        state_info[s] = *sho_states[s];
+    initialize();
 }
 
 template <typename T>
-void SHOSetT<T>::initialize()
+void
+SHOSetT<T>::initialize()
 {
-  using std::sqrt;
+    using std::sqrt;
 
-  this->OrbitalSetSize = state_info.size();
+    this->OrbitalSetSize = state_info.size();
 
-  qn_max = -1;
-  for (int s = 0; s < state_info.size(); ++s)
+    qn_max = -1;
+    for (int s = 0; s < state_info.size(); ++s)
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
+    qn_max += 1;
+
+    nmax = -1;
     for (int d = 0; d < QMCTraits::DIM; ++d)
-      qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
-  qn_max += 1;
-
-  nmax = -1;
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-    nmax = std::max(nmax, qn_max[d]);
-
-  prefactors.resize(nmax);
-  hermite.resize(QMCTraits::DIM, nmax);
-  bvalues.resize(QMCTraits::DIM, nmax);
-
-  if (nmax > 0)
-  {
-    prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
-    for (int n = 1; n < nmax; ++n)
-      prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
-  }
+        nmax = std::max(nmax, qn_max[d]);
+
+    prefactors.resize(nmax);
+    hermite.resize(QMCTraits::DIM, nmax);
+    bvalues.resize(QMCTraits::DIM, nmax);
+
+    if (nmax > 0) {
+        prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+        for (int n = 1; n < nmax; ++n)
+            prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
+    }
 }
 
 template <typename T>
 SHOSetT<T>::~SHOSetT() = default;
 
 template <typename T>
-std::unique_ptr<SPOSetT<T>> SHOSetT<T>::makeClone() const { return std::make_unique<SHOSetT<T>>(*this); }
-
-template <typename T>
-void SHOSetT<T>::report(const std::string& pad) const
+std::unique_ptr<SPOSetT<T>>
+SHOSetT<T>::makeClone() const
 {
-  app_log() << pad << "SHOSet report" << std::endl;
-  app_log() << pad << "  length    = " << length << std::endl;
-  app_log() << pad << "  center    = " << center << std::endl;
-  app_log() << pad << "  nmax      = " << nmax << std::endl;
-  app_log() << pad << "  qn_max    = " << qn_max << std::endl;
-  app_log() << pad << "  # states  = " << state_info.size() << std::endl;
-  app_log() << pad << "  states" << std::endl;
-  for (int s = 0; s < state_info.size(); ++s)
-    state_info[s].sho_report(pad + "    " + int2string(s) + " ");
-  app_log() << pad << "end SHOSet report" << std::endl;
-  app_log().flush();
+    return std::make_unique<SHOSetT<T>>(*this);
 }
 
 template <typename T>
-void SHOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+void
+SHOSetT<T>::report(const std::string& pad) const
 {
-  const PosType& r(P.activeR(iat));
-  ValueVector p(&psi[0], this->size());
-  evaluate_v(r, p);
+    app_log() << pad << "SHOSet report" << std::endl;
+    app_log() << pad << "  length    = " << length << std::endl;
+    app_log() << pad << "  center    = " << center << std::endl;
+    app_log() << pad << "  nmax      = " << nmax << std::endl;
+    app_log() << pad << "  qn_max    = " << qn_max << std::endl;
+    app_log() << pad << "  # states  = " << state_info.size() << std::endl;
+    app_log() << pad << "  states" << std::endl;
+    for (int s = 0; s < state_info.size(); ++s)
+        state_info[s].sho_report(pad + "    " + int2string(s) + " ");
+    app_log() << pad << "end SHOSet report" << std::endl;
+    app_log().flush();
 }
 
 template <typename T>
-void SHOSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+void
+SHOSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  const PosType& r(P.activeR(iat));
-  ValueVector p(&psi[0], this->size());
-  GradVector dp(&dpsi[0], this->size());
-  ValueVector d2p(&d2psi[0], this->size());
-  evaluate_vgl(r, p, dp, d2p);
+    const PosType& r(P.activeR(iat));
+    ValueVector p(&psi[0], this->size());
+    evaluate_v(r, p);
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  ValueMatrix& logdet,
-                                  GradMatrix& dlogdet,
-                                  ValueMatrix& d2logdet)
+void
+SHOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+    GradVector& dpsi, ValueVector& d2psi)
 {
-  for (int iat = first, i = 0; iat < last; ++iat, ++i)
-  {
-    ValueVector p(logdet[i], this->size());
-    GradVector dp(dlogdet[i], this->size());
-    ValueVector d2p(d2logdet[i], this->size());
-    evaluate_vgl(P.R[iat], p, dp, d2p);
-  }
+    const PosType& r(P.activeR(iat));
+    ValueVector p(&psi[0], this->size());
+    GradVector dp(&dpsi[0], this->size());
+    ValueVector d2p(&d2psi[0], this->size());
+    evaluate_vgl(r, p, dp, d2p);
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
+void
+SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
 {
-  PosType x = (r - center) / length;
-  evaluate_hermite(x);
-  evaluate_d0(x, psi);
+    for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+        ValueVector p(logdet[i], this->size());
+        GradVector dp(dlogdet[i], this->size());
+        ValueVector d2p(d2logdet[i], this->size());
+        evaluate_vgl(P.R[iat], p, dp, d2p);
+    }
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+void
+SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
 {
-  PosType x = (r - center) / length;
-  evaluate_hermite(x);
-  evaluate_d0(x, psi);
-  evaluate_d1(x, psi, dpsi);
-  evaluate_d2(x, psi, d2psi);
+    PosType x = (r - center) / length;
+    evaluate_hermite(x);
+    evaluate_d0(x, psi);
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_hermite(const PosType& xpos)
+void
+SHOSetT<T>::evaluate_vgl(
+    PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    int nh = qn_max[d];
-    if (nh > 0)
-    {
-      RealType x    = xpos[d];
-      hermite(d, 0) = 1.0;
-      RealType Hnm2 = 0.0;
-      RealType Hnm1 = 1.0;
-      for (int n = 1; n < nh; ++n)
-      {
-        RealType Hn   = 2 * (x * Hnm1 - (n - 1) * Hnm2);
-        hermite(d, n) = Hn;
-        Hnm2          = Hnm1;
-        Hnm1          = Hn;
-      }
-    }
-  }
+    PosType x = (r - center) / length;
+    evaluate_hermite(x);
+    evaluate_d0(x, psi);
+    evaluate_d1(x, psi, dpsi);
+    evaluate_d2(x, psi, d2psi);
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
+void
+SHOSetT<T>::evaluate_hermite(const PosType& xpos)
 {
-  using std::exp;
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    RealType x = xpos[d];
-    RealType g = exp(-.5 * x * x);
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      bvalues(d, n) = prefactors[n] * g * hermite(d, n);
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        int nh = qn_max[d];
+        if (nh > 0) {
+            RealType x = xpos[d];
+            hermite(d, 0) = 1.0;
+            RealType Hnm2 = 0.0;
+            RealType Hnm1 = 1.0;
+            for (int n = 1; n < nh; ++n) {
+                RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2);
+                hermite(d, n) = Hn;
+                Hnm2 = Hnm1;
+                Hnm1 = Hn;
+            }
+        }
     }
-  }
-  for (int s = 0; s < state_info.size(); ++s)
-  {
-    const SHOState& state = state_info[s];
-    RealType phi          = 1.0;
-    for (int d = 0; d < QMCTraits::DIM; ++d)
-      phi *= bvalues(d, state.quantum_number[d]);
-    psi[s] = phi;
-  }
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
+void
+SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
 {
-  RealType ol = 1.0 / length;
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    RealType x    = xpos[d];
-    RealType Hnm1 = 0.0;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      RealType Hn   = hermite(d, n);
-      bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
-      Hnm1          = Hn;
+    using std::exp;
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        RealType x = xpos[d];
+        RealType g = exp(-.5 * x * x);
+        for (int n = 0; n < qn_max[d]; ++n) {
+            bvalues(d, n) = prefactors[n] * g * hermite(d, n);
+        }
+    }
+    for (int s = 0; s < state_info.size(); ++s) {
+        const SHOState& state = state_info[s];
+        RealType phi = 1.0;
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+            phi *= bvalues(d, state.quantum_number[d]);
+        psi[s] = phi;
     }
-  }
-  for (int s = 0; s < state_info.size(); ++s)
-  {
-    const SHOState& state = state_info[s];
-    TinyVector<T, QMCTraits::DIM> dphi;
-    for (int d = 0; d < QMCTraits::DIM; ++d)
-      dphi[d] = bvalues(d, state.quantum_number[d]);
-    dphi *= psi[s];
-    dpsi[s] = dphi;
-  }
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
+void
+SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
 {
-  RealType ol2 = 1.0 / (length * length);
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    RealType x  = xpos[d];
-    RealType x2 = x * x;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
+    RealType ol = 1.0 / length;
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        RealType x = xpos[d];
+        RealType Hnm1 = 0.0;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            RealType Hn = hermite(d, n);
+            bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
+            Hnm1 = Hn;
+        }
+    }
+    for (int s = 0; s < state_info.size(); ++s) {
+        const SHOState& state = state_info[s];
+        TinyVector<T, QMCTraits::DIM> dphi;
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+            dphi[d] = bvalues(d, state.quantum_number[d]);
+        dphi *= psi[s];
+        dpsi[s] = dphi;
     }
-  }
-  for (int s = 0; s < state_info.size(); ++s)
-  {
-    const SHOState& state = state_info[s];
-    T d2phi       = 0.0;
-    for (int d = 0; d < QMCTraits::DIM; ++d)
-      d2phi += bvalues(d, state.quantum_number[d]);
-    d2phi *= psi[s];
-    d2psi[s] = d2phi;
-  }
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+void
+SHOSetT<T>::evaluate_d2(
+    const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
 {
-  using std::exp;
-  using std::sqrt;
-
-  evaluate_vgl(r, psi, dpsi, d2psi);
-
-  const int N = 6;
-  RealType H[N], dH[N], d2H[N], pre[N];
-  RealType p[N], dp[N], d2p[N];
-
-  pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
-  for (int n = 1; n < N; ++n)
-    pre[n] = pre[n - 1] / sqrt(2. * n);
-
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    RealType x  = (r[d] - center[d]) / length;
-    RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x;
-    H[0]       = 1;
-    dH[0]      = 0;
-    d2H[0]     = 0;
-    H[1]       = 2 * x;
-    dH[1]      = 2;
-    d2H[1]     = 0;
-    H[2]       = 4 * x2 - 2;
-    dH[2]      = 8 * x;
-    d2H[2]     = 8;
-    H[3]       = 8 * x3 - 12 * x;
-    dH[3]      = 24 * x2 - 12;
-    d2H[3]     = 48 * x;
-    H[4]       = 16 * x4 - 48 * x2 + 12;
-    dH[4]      = 64 * x3 - 96 * x;
-    d2H[4]     = 192 * x2 - 96;
-    H[5]       = 32 * x5 - 160 * x3 + 120 * x;
-    dH[5]      = 160 * x4 - 480 * x2 + 120;
-    d2H[5]     = 640 * x3 - 960 * x;
-    RealType g = exp(-x2 / 2);
-    for (int n = 0; n < N; ++n)
-    {
-      p[n]   = pre[n] * g * H[n];
-      dp[n]  = pre[n] * g * (-x * H[n] + dH[n]);
-      d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
-    }
-    app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
-    app_log() << "  hermite check" << std::endl;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      app_log() << "    " << n << " " << H[n] << std::endl;
-      app_log() << "    " << n << " " << hermite(d, n) << std::endl;
-    }
-    app_log() << "  phi d0 check" << std::endl;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      app_log() << "    " << n << " " << p[n] << std::endl;
-      app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
-    }
-    app_log() << "  phi d1 check" << std::endl;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
-      app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
+    RealType ol2 = 1.0 / (length * length);
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        RealType x = xpos[d];
+        RealType x2 = x * x;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
+        }
     }
-    app_log() << "  phi d2 check" << std::endl;
-    for (int n = 0; n < qn_max[d]; ++n)
-    {
-      app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
-      app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
+    for (int s = 0; s < state_info.size(); ++s) {
+        const SHOState& state = state_info[s];
+        T d2phi = 0.0;
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+            d2phi += bvalues(d, state.quantum_number[d]);
+        d2phi *= psi[s];
+        d2psi[s] = d2phi;
     }
-  }
 }
 
 template <typename T>
-void SHOSetT<T>::test_derivatives()
+void
+SHOSetT<T>::evaluate_check(
+    PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  int n       = 3;
-  PosType c   = 5.123;
-  PosType L   = 1.0;
-  PosType drg = L / n;
-  PosType dr  = L / 1000;
-  int nphi    = state_info.size();
-
-  PosType o2dr, odr2;
-
-  ValueVector vpsi, vpsitmp;
-  GradVector vdpsi, vdpsin;
-  ValueVector vd2psi, vd2psin;
-
-
-  vpsi.resize(nphi);
-  vdpsi.resize(nphi);
-  vd2psi.resize(nphi);
-
-  vpsitmp.resize(nphi);
-  vdpsin.resize(nphi);
-  vd2psin.resize(nphi);
-
-
-  ValueVector psi(&vpsi[0], this->size());
-  GradVector dpsi(&vdpsi[0], this->size());
-  ValueVector d2psi(&vd2psi[0], this->size());
-
-  ValueVector psitmp(&vpsitmp[0], this->size());
-  GradVector dpsin(&vdpsin[0], this->size());
-  ValueVector d2psin(&vd2psin[0], this->size());
-
-
-  app_log() << " loading dr" << std::endl;
-
-  RealType odr2sum = 0.0;
-  for (int d = 0; d < QMCTraits::DIM; ++d)
-  {
-    RealType odr = 1.0 / dr[d];
-    o2dr[d]      = .5 * odr;
-    odr2[d]      = odr * odr;
-    odr2sum += odr2[d];
-  }
-
-  app_log() << "SHOSet::test_derivatives" << std::endl;
-
-  const SimulationCell simulation_cell;
-  ParticleSet Ps(simulation_cell);
-
-  int p = 0;
-  PosType r, rtmp;
-  for (int i = 0; i < n; ++i)
-  {
-    r[0] = c[0] + i * drg[0];
-    for (int j = 0; j < n; ++j)
-    {
-      r[1] = c[1] + j * drg[1];
-      for (int k = 0; k < n; ++k)
-      {
-        r[2] = c[2] + k * drg[2];
-
-        evaluate_vgl(r, psi, dpsi, d2psi);
-
-        for (int m = 0; m < nphi; ++m)
-          d2psin[m] = -2 * odr2sum * psi[m];
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-        {
-          rtmp = r;
-          rtmp[d] += dr[d];
-          evaluate_v(rtmp, psitmp);
-          for (int m = 0; m < nphi; ++m)
-          {
-            T phi = psitmp[m];
-            dpsin[m][d]   = phi * o2dr[d];
-            d2psin[m] += phi * odr2[d];
-          }
-          rtmp = r;
-          rtmp[d] -= dr[d];
-          evaluate_v(rtmp, psitmp);
-          for (int m = 0; m < nphi; ++m)
-          {
-            T phi = psitmp[m];
-            dpsin[m][d] -= phi * o2dr[d];
-            d2psin[m] += phi * odr2[d];
-          }
+    using std::exp;
+    using std::sqrt;
+
+    evaluate_vgl(r, psi, dpsi, d2psi);
+
+    const int N = 6;
+    RealType H[N], dH[N], d2H[N], pre[N];
+    RealType p[N], dp[N], d2p[N];
+
+    pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+    for (int n = 1; n < N; ++n)
+        pre[n] = pre[n - 1] / sqrt(2. * n);
+
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        RealType x = (r[d] - center[d]) / length;
+        RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x,
+                 x5 = x * x * x * x * x;
+        H[0] = 1;
+        dH[0] = 0;
+        d2H[0] = 0;
+        H[1] = 2 * x;
+        dH[1] = 2;
+        d2H[1] = 0;
+        H[2] = 4 * x2 - 2;
+        dH[2] = 8 * x;
+        d2H[2] = 8;
+        H[3] = 8 * x3 - 12 * x;
+        dH[3] = 24 * x2 - 12;
+        d2H[3] = 48 * x;
+        H[4] = 16 * x4 - 48 * x2 + 12;
+        dH[4] = 64 * x3 - 96 * x;
+        d2H[4] = 192 * x2 - 96;
+        H[5] = 32 * x5 - 160 * x3 + 120 * x;
+        dH[5] = 160 * x4 - 480 * x2 + 120;
+        d2H[5] = 640 * x3 - 960 * x;
+        RealType g = exp(-x2 / 2);
+        for (int n = 0; n < N; ++n) {
+            p[n] = pre[n] * g * H[n];
+            dp[n] = pre[n] * g * (-x * H[n] + dH[n]);
+            d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
         }
-
-        RealType dphi_diff  = 0.0;
-        RealType d2phi_diff = 0.0;
-        for (int m = 0; m < nphi; ++m)
-          for (int d = 0; d < QMCTraits::DIM; ++d)
-            dphi_diff = std::max<RealType>(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d]));
-        for (int m = 0; m < nphi; ++m)
-          d2phi_diff = std::max<RealType>(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
-        app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff << std::endl;
-        app_log() << "    derivatives" << std::endl;
-        for (int m = 0; m < nphi; ++m)
-        {
-          std::string qn = "";
-          for (int d = 0; d < QMCTraits::DIM; ++d)
-            qn += int2string(state_info[m].quantum_number[d]) + " ";
-          app_log() << "    " << qn;
-          for (int d = 0; d < QMCTraits::DIM; ++d)
-            app_log() << real(dpsi[m][d]) << " ";
-          app_log() << std::endl;
-          app_log() << "    " << qn;
-          for (int d = 0; d < QMCTraits::DIM; ++d)
-            app_log() << real(dpsin[m][d]) << " ";
-          app_log() << std::endl;
+        app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
+        app_log() << "  hermite check" << std::endl;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            app_log() << "    " << n << " " << H[n] << std::endl;
+            app_log() << "    " << n << " " << hermite(d, n) << std::endl;
         }
-        app_log() << "    laplacians" << std::endl;
-        PosType x = r / length;
-        for (int m = 0; m < nphi; ++m)
-        {
-          std::string qn = "";
-          for (int d = 0; d < QMCTraits::DIM; ++d)
-            qn += int2string(state_info[m].quantum_number[d]) + " ";
-          app_log() << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
-          app_log() << "    " << qn << real(d2psin[m] / psi[m]) << std::endl;
+        app_log() << "  phi d0 check" << std::endl;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            app_log() << "    " << n << " " << p[n] << std::endl;
+            app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
+        }
+        app_log() << "  phi d1 check" << std::endl;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
+            app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
+        }
+        app_log() << "  phi d2 check" << std::endl;
+        for (int n = 0; n < qn_max[d]; ++n) {
+            app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
+            app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
         }
-        p++;
-      }
     }
-  }
-
-  app_log() << "end SHOSet::test_derivatives" << std::endl;
 }
 
 template <typename T>
-void SHOSetT<T>::test_overlap()
+void
+SHOSetT<T>::test_derivatives()
 {
-  app_log() << "SHOSet::test_overlap" << std::endl;
+    int n = 3;
+    PosType c = 5.123;
+    PosType L = 1.0;
+    PosType drg = L / n;
+    PosType dr = L / 1000;
+    int nphi = state_info.size();
+
+    PosType o2dr, odr2;
+
+    ValueVector vpsi, vpsitmp;
+    GradVector vdpsi, vdpsin;
+    ValueVector vd2psi, vd2psin;
+
+    vpsi.resize(nphi);
+    vdpsi.resize(nphi);
+    vd2psi.resize(nphi);
+
+    vpsitmp.resize(nphi);
+    vdpsin.resize(nphi);
+    vd2psin.resize(nphi);
+
+    ValueVector psi(&vpsi[0], this->size());
+    GradVector dpsi(&vdpsi[0], this->size());
+    ValueVector d2psi(&vd2psi[0], this->size());
+
+    ValueVector psitmp(&vpsitmp[0], this->size());
+    GradVector dpsin(&vdpsin[0], this->size());
+    ValueVector d2psin(&vd2psin[0], this->size());
+
+    app_log() << " loading dr" << std::endl;
+
+    RealType odr2sum = 0.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d) {
+        RealType odr = 1.0 / dr[d];
+        o2dr[d] = .5 * odr;
+        odr2[d] = odr * odr;
+        odr2sum += odr2[d];
+    }
 
+    app_log() << "SHOSet::test_derivatives" << std::endl;
+
+    const SimulationCellT<T> simulation_cell;
+    ParticleSetT<T> Ps(simulation_cell);
+
+    int p = 0;
+    PosType r, rtmp;
+    for (int i = 0; i < n; ++i) {
+        r[0] = c[0] + i * drg[0];
+        for (int j = 0; j < n; ++j) {
+            r[1] = c[1] + j * drg[1];
+            for (int k = 0; k < n; ++k) {
+                r[2] = c[2] + k * drg[2];
+
+                evaluate_vgl(r, psi, dpsi, d2psi);
+
+                for (int m = 0; m < nphi; ++m)
+                    d2psin[m] = -2 * odr2sum * psi[m];
+                for (int d = 0; d < QMCTraits::DIM; ++d) {
+                    rtmp = r;
+                    rtmp[d] += dr[d];
+                    evaluate_v(rtmp, psitmp);
+                    for (int m = 0; m < nphi; ++m) {
+                        T phi = psitmp[m];
+                        dpsin[m][d] = phi * o2dr[d];
+                        d2psin[m] += phi * odr2[d];
+                    }
+                    rtmp = r;
+                    rtmp[d] -= dr[d];
+                    evaluate_v(rtmp, psitmp);
+                    for (int m = 0; m < nphi; ++m) {
+                        T phi = psitmp[m];
+                        dpsin[m][d] -= phi * o2dr[d];
+                        d2psin[m] += phi * odr2[d];
+                    }
+                }
+
+                RealType dphi_diff = 0.0;
+                RealType d2phi_diff = 0.0;
+                for (int m = 0; m < nphi; ++m)
+                    for (int d = 0; d < QMCTraits::DIM; ++d)
+                        dphi_diff = std::max<RealType>(dphi_diff,
+                            std::abs(dpsi[m][d] - dpsin[m][d]) /
+                                std::abs(dpsin[m][d]));
+                for (int m = 0; m < nphi; ++m)
+                    d2phi_diff = std::max<RealType>(d2phi_diff,
+                        std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
+                app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff
+                          << std::endl;
+                app_log() << "    derivatives" << std::endl;
+                for (int m = 0; m < nphi; ++m) {
+                    std::string qn = "";
+                    for (int d = 0; d < QMCTraits::DIM; ++d)
+                        qn += int2string(state_info[m].quantum_number[d]) + " ";
+                    app_log() << "    " << qn;
+                    for (int d = 0; d < QMCTraits::DIM; ++d)
+                        app_log() << real(dpsi[m][d]) << " ";
+                    app_log() << std::endl;
+                    app_log() << "    " << qn;
+                    for (int d = 0; d < QMCTraits::DIM; ++d)
+                        app_log() << real(dpsin[m][d]) << " ";
+                    app_log() << std::endl;
+                }
+                app_log() << "    laplacians" << std::endl;
+                PosType x = r / length;
+                for (int m = 0; m < nphi; ++m) {
+                    std::string qn = "";
+                    for (int d = 0; d < QMCTraits::DIM; ++d)
+                        qn += int2string(state_info[m].quantum_number[d]) + " ";
+                    app_log()
+                        << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
+                    app_log() << "    " << qn << real(d2psin[m] / psi[m])
+                              << std::endl;
+                }
+                p++;
+            }
+        }
+    }
 
-  //linear
-  int d = 0;
+    app_log() << "end SHOSet::test_derivatives" << std::endl;
+}
 
-  app_log() << "  length = " << length << std::endl;
-  app_log() << "  prefactors" << std::endl;
-  for (int n = 0; n < qn_max[d]; ++n)
-    app_log() << "    " << n << " " << prefactors[n] << std::endl;
+template <typename T>
+void
+SHOSetT<T>::test_overlap()
+{
+    app_log() << "SHOSet::test_overlap" << std::endl;
 
-  app_log() << "  1d overlap" << std::endl;
+    // linear
+    int d = 0;
 
-  ValueVector vpsi;
-  vpsi.resize(this->size());
-  ValueVector psi(&vpsi[0], this->size());
+    app_log() << "  length = " << length << std::endl;
+    app_log() << "  prefactors" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+        app_log() << "    " << n << " " << prefactors[n] << std::endl;
 
-  double xmax = 4.0;
-  double dx   = .1;
-  double dr   = length * dx;
+    app_log() << "  1d overlap" << std::endl;
 
-  int nphi = qn_max[d];
-  Array<double, 2> omat;
-  omat.resize(nphi, nphi);
-  for (int i = 0; i < nphi; ++i)
-    for (int j = 0; j < nphi; ++j)
-      omat(i, j) = 0.0;
+    ValueVector vpsi;
+    vpsi.resize(this->size());
+    ValueVector psi(&vpsi[0], this->size());
 
-  PosType xp = 0.0;
-  for (double x = -xmax; x < xmax; x += dx)
-  {
-    xp[d] = x;
-    evaluate_hermite(xp);
-    evaluate_d0(xp, psi);
+    double xmax = 4.0;
+    double dx = .1;
+    double dr = length * dx;
 
+    int nphi = qn_max[d];
+    Array<double, 2> omat;
+    omat.resize(nphi, nphi);
     for (int i = 0; i < nphi; ++i)
-      for (int j = 0; j < nphi; ++j)
-        omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
-  }
+        for (int j = 0; j < nphi; ++j)
+            omat(i, j) = 0.0;
 
-  for (int i = 0; i < nphi; ++i)
-  {
-    app_log() << std::endl;
-    for (int j = 0; j < nphi; ++j)
-      app_log() << omat(i, j) << " ";
-  }
-  app_log() << std::endl;
-
-
-  //volumetric
-  app_log() << "  3d overlap" << std::endl;
-  double dV = dr * dr * dr;
-  nphi      = this->size();
-  omat.resize(nphi, nphi);
-  for (int i = 0; i < nphi; ++i)
-    for (int j = 0; j < nphi; ++j)
-      omat(i, j) = 0.0;
-  for (double x = -xmax; x < xmax; x += dx)
-    for (double y = -xmax; y < xmax; y += dx)
-      for (double z = -xmax; z < xmax; z += dx)
-      {
-        xp[0] = x;
-        xp[1] = y;
-        xp[2] = z;
+    PosType xp = 0.0;
+    for (double x = -xmax; x < xmax; x += dx) {
+        xp[d] = x;
         evaluate_hermite(xp);
         evaluate_d0(xp, psi);
 
         for (int i = 0; i < nphi; ++i)
-          for (int j = 0; j < nphi; ++j)
-            omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
-      }
-  for (int i = 0; i < nphi; ++i)
-  {
+            for (int j = 0; j < nphi; ++j)
+                omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
+    }
+
+    for (int i = 0; i < nphi; ++i) {
+        app_log() << std::endl;
+        for (int j = 0; j < nphi; ++j)
+            app_log() << omat(i, j) << " ";
+    }
     app_log() << std::endl;
-    for (int j = 0; j < nphi; ++j)
-      app_log() << omat(i, j) << " ";
-  }
-  app_log() << std::endl;
 
+    // volumetric
+    app_log() << "  3d overlap" << std::endl;
+    double dV = dr * dr * dr;
+    nphi = this->size();
+    omat.resize(nphi, nphi);
+    for (int i = 0; i < nphi; ++i)
+        for (int j = 0; j < nphi; ++j)
+            omat(i, j) = 0.0;
+    for (double x = -xmax; x < xmax; x += dx)
+        for (double y = -xmax; y < xmax; y += dx)
+            for (double z = -xmax; z < xmax; z += dx) {
+                xp[0] = x;
+                xp[1] = y;
+                xp[2] = z;
+                evaluate_hermite(xp);
+                evaluate_d0(xp, psi);
+
+                for (int i = 0; i < nphi; ++i)
+                    for (int j = 0; j < nphi; ++j)
+                        omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
+            }
+    for (int i = 0; i < nphi; ++i) {
+        app_log() << std::endl;
+        for (int j = 0; j < nphi; ++j)
+            app_log() << omat(i, j) << " ";
+    }
+    app_log() << std::endl;
 
-  app_log() << "end SHOSet::test_overlap" << std::endl;
+    app_log() << "end SHOSet::test_overlap" << std::endl;
 }
 
 template <typename T>
-void SHOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+void
+SHOSetT<T>::evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
+    GGGMatrix& grad_grad_grad_logdet)
 {
-  not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
+    not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  ValueMatrix& logdet,
-                                  GradMatrix& dlogdet,
-                                  HessMatrix& grad_grad_logdet)
+void
+SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet)
 {
-  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
+    not_implemented(
+        "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
 }
 
 template <typename T>
-void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  ValueMatrix& logdet,
-                                  GradMatrix& dlogdet,
-                                  HessMatrix& grad_grad_logdet,
-                                  GGGMatrix& grad_grad_grad_logdet)
+void
+SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
+    GGGMatrix& grad_grad_grad_logdet)
 {
-  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+    not_implemented(
+        "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
 }
 
 template <typename T>
-void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                int first,
-                                int last,
-                                const ParticleSet& source,
-                                int iat_src,
-                                GradMatrix& gradphi)
+void
+SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+    const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
 {
-  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
+    not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
 }
 
 template <typename T>
-void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                int first,
-                                int last,
-                                const ParticleSet& source,
-                                int iat_src,
-                                GradMatrix& grad_phi,
-                                HessMatrix& grad_grad_phi,
-                                GradMatrix& grad_lapl_phi)
+void
+SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+    const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+    HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi)
 {
-  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
+    not_implemented(
+        "evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
index 6ef256df92..d8e89e9e0e 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
@@ -1,158 +1,177 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
+//                    Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SHOSETT_H
 #define QMCPLUSPLUS_SHOSETT_H
 
-#include "QMCWaveFunctions/SPOSetT.h"
 #include "QMCWaveFunctions/SPOInfo.h"
+#include "QMCWaveFunctions/SPOSetT.h"
 
 namespace qmcplusplus
 {
 struct SHOState : public SPOInfo
 {
-  TinyVector<int, QMCTraits::DIM> quantum_number;
-
-  SHOState()
-  {
-    quantum_number = -1;
-    energy         = 0.0;
-  }
-
-  ~SHOState() override {}
-
-  inline void set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
-  {
-    quantum_number = qn;
-    energy         = e;
-  }
-
-  inline void sho_report(const std::string& pad = "") const
-  {
-    app_log() << pad << "qn=" << quantum_number << "  e=" << energy << std::endl;
-  }
+    TinyVector<int, QMCTraits::DIM> quantum_number;
+
+    SHOState()
+    {
+        quantum_number = -1;
+        energy = 0.0;
+    }
+
+    ~SHOState() override
+    {
+    }
+
+    inline void
+    set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
+    {
+        quantum_number = qn;
+        energy = e;
+    }
+
+    inline void
+    sho_report(const std::string& pad = "") const
+    {
+        app_log() << pad << "qn=" << quantum_number << "  e=" << energy
+                  << std::endl;
+    }
 };
 
-template<typename T>
+template <typename T>
 class SHOSetT : public SPOSetT<T>
 {
 public:
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using value_type  = typename ValueMatrix::value_type;
-  using grad_type   = typename GradMatrix::value_type;
-  using RealType    = typename SPOSetT<T>::RealType;
-  using PosType     = TinyVector<RealType, QMCTraits::DIM>;
-  using HessType    = typename OrbitalSetTraits<T>::HessType;
-  using HessMatrix  = typename OrbitalSetTraits<T>::HessMatrix;
-  using GGGType     = TinyVector<HessType, OHMMS_DIM>;
-  using GGGVector   = Vector<GGGType>;
-  using GGGMatrix   = Matrix<GGGType>;
-
-  RealType length;
-  PosType center;
-
-  int nmax;
-  TinyVector<int, QMCTraits::DIM> qn_max;
-  std::vector<SHOState> state_info;
-  std::vector<RealType> prefactors;
-  Array<RealType, 2> hermite;
-  Array<RealType, 2> bvalues;
-  Array<RealType, 2> d0_values;
-  Array<RealType, 2> d1_values;
-  Array<RealType, 2> d2_values;
-
-  //construction/destruction
-  SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states);
-
-  ~SHOSetT() override;
-
-  std::string getClassName() const override { return "SHOSet"; }
-
-  void initialize();
-
-  //SPOSet interface methods
-  std::unique_ptr<SPOSetT<T>> makeClone() const override;
-
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
-
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override;
-
-
-  //local functions
-  void evaluate_v(PosType r, ValueVector& psi);
-  void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-  void evaluate_hermite(const PosType& xpos);
-  void evaluate_d0(const PosType& xpos, ValueVector& psi);
-  void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
-  void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
-  void report(const std::string& pad = "") const override;
-  void test_derivatives();
-  void test_overlap();
-  void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-
-  //empty methods
-  /// number of orbitals is determined only by initial request
-  inline void setOrbitalSetSize(int norbs) override {}
-
-  ///unimplemented functions call this to abort
-  inline void not_implemented(const std::string& method)
-  {
-    APP_ABORT("SHOSet::" + method + " has not been implemented.");
-  }
-
-
-  //methods to be implemented in the future (possibly)
-  void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& ddlogdet) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& ddlogdet,
-                            GGGMatrix& dddlogdet) override;
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& gradphi) override;
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& dphi,
-                          HessMatrix& ddphi,
-                          GradMatrix& dlapl_phi) override;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using value_type = typename ValueMatrix::value_type;
+    using grad_type = typename GradMatrix::value_type;
+    using RealType = typename SPOSetT<T>::RealType;
+    using PosType = TinyVector<RealType, QMCTraits::DIM>;
+    using HessType = typename OrbitalSetTraits<T>::HessType;
+    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
+    using GGGType = TinyVector<HessType, OHMMS_DIM>;
+    using GGGVector = Vector<GGGType>;
+    using GGGMatrix = Matrix<GGGType>;
+
+    RealType length;
+    PosType center;
+
+    int nmax;
+    TinyVector<int, QMCTraits::DIM> qn_max;
+    std::vector<SHOState> state_info;
+    std::vector<RealType> prefactors;
+    Array<RealType, 2> hermite;
+    Array<RealType, 2> bvalues;
+    Array<RealType, 2> d0_values;
+    Array<RealType, 2> d1_values;
+    Array<RealType, 2> d2_values;
+
+    // construction/destruction
+    SHOSetT(const std::string& my_name, RealType l, PosType c,
+        const std::vector<SHOState*>& sho_states);
+
+    ~SHOSetT() override;
+
+    std::string
+    getClassName() const override
+    {
+        return "SHOSet";
+    }
+
+    void
+    initialize();
+
+    // SPOSet interface methods
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override;
+
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override;
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override;
+
+    // local functions
+    void
+    evaluate_v(PosType r, ValueVector& psi);
+    void
+    evaluate_vgl(
+        PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+    void
+    evaluate_hermite(const PosType& xpos);
+    void
+    evaluate_d0(const PosType& xpos, ValueVector& psi);
+    void
+    evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
+    void
+    evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
+    void
+    report(const std::string& pad = "") const override;
+    void
+    test_derivatives();
+    void
+    test_overlap();
+    void
+    evaluate_check(
+        PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+    // empty methods
+    /// number of orbitals is determined only by initial request
+    inline void
+    setOrbitalSetSize(int norbs) override
+    {
+    }
+
+    /// unimplemented functions call this to abort
+    inline void
+    not_implemented(const std::string& method)
+    {
+        APP_ABORT("SHOSet::" + method + " has not been implemented.");
+    }
+
+    // methods to be implemented in the future (possibly)
+    void
+    evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
+        GGGMatrix& dddlogdet) override;
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        HessMatrix& ddlogdet) override;
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
+        GGGMatrix& dddlogdet) override;
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src,
+        GradMatrix& gradphi) override;
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& dphi,
+        HessMatrix& ddphi, GradMatrix& dlapl_phi) override;
 };
 
 } // namespace qmcplusplus
 
-
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
new file mode 100644
index 0000000000..022d6db4a5
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
@@ -0,0 +1,923 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
+//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "AOBasisBuilderT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "OhmmsData/AttributeSet.h"
+#include "RadialOrbitalSetBuilder.h"
+#include "SoaAtomicBasisSetT.h"
+#include "Utilities/ProgressReportEngine.h"
+
+namespace qmcplusplus
+{
+template <typename COT>
+AOBasisBuilderT<COT>::AOBasisBuilderT(
+    const std::string& eName, Communicate* comm) :
+    MPIObjectBase(comm),
+    addsignforM(false),
+    expandlm(GAUSSIAN_EXPAND),
+    Morder("gaussian"),
+    sph("default"),
+    basisType("Numerical"),
+    elementType(eName),
+    Normalized("yes")
+{
+    // mmorales: for "Cartesian Gaussian", m is an integer that maps
+    //           the component to Gamess notation, see
+    //           Numerics/CartesianTensor.h
+    nlms_id["n"] = q_n;
+    nlms_id["l"] = q_l;
+    nlms_id["m"] = q_m;
+    nlms_id["s"] = q_s;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::put(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)");
+    // Register valid attributes attributes
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(basisType, "type");
+    aAttrib.add(sph, "angular");
+    aAttrib.add(addsignforM, "expM");
+    aAttrib.add(Morder, "expandYlm");
+    aAttrib.add(Normalized, "normalized");
+    aAttrib.put(cur);
+    PRE.echo(cur);
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+
+    // Numerical basis is a special case
+    if (basisType == "Numerical")
+        myComm->barrier_and_abort(
+            "Purely numerical atomic orbitals are not supported any longer.");
+
+    return true;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::putH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)");
+    std::string CenterID, basisName;
+
+    if (myComm->rank() == 0) {
+        hin.read(sph, "angular");
+        hin.read(CenterID, "elementType");
+        hin.read(Normalized, "normalized");
+        hin.read(Morder, "expandYlm");
+        hin.read(basisName, "name");
+    }
+
+    myComm->bcast(sph);
+    myComm->bcast(Morder);
+    myComm->bcast(CenterID);
+    myComm->bcast(Normalized);
+    myComm->bcast(basisName);
+    myComm->bcast(basisType);
+    myComm->bcast(addsignforM);
+
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+    app_log() << R"(<input node="atomicBasisSet" name=")" << basisName
+              << "\" expandYlm=\"" << Morder << "\" angular=\"" << sph
+              << "\" elementType=\"" << CenterID << "\" normalized=\""
+              << Normalized << "\" type=\"" << basisType << "\" expM=\""
+              << addsignforM << "\" />" << std::endl;
+
+    return true;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSet(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+    // process the basic property: maximun angular momentum, the number of basis
+    // functions to be added
+    std::vector<xmlNodePtr> radGroup;
+    xmlNodePtr cur1 = cur->xmlChildrenNode;
+    xmlNodePtr gptr = 0;
+    while (cur1 != NULL) {
+        std::string cname1((const char*)(cur1->name));
+        if (cname1 == "basisGroup") {
+            radGroup.push_back(cur1);
+            const int l = std::stoi(getXMLAttributeValue(cur1, "l"));
+            Lmax = std::max(Lmax, l);
+            // expect that only Rnl is given
+            if (expandlm == CARTESIAN_EXPAND ||
+                expandlm == DIRAC_CARTESIAN_EXPAND)
+                num += (l + 1) * (l + 2) / 2;
+            else if (expandlm)
+                num += 2 * l + 1;
+            else
+                num++;
+        }
+        else if (cname1 == "grid") {
+            gptr = cur1;
+        }
+        cur1 = cur1->next;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGrid(
+        gptr, basisType); // assign a radial grid for the new center
+    std::vector<xmlNodePtr>::iterator it(radGroup.begin());
+    std::vector<xmlNodePtr>::iterator it_end(radGroup.end());
+    std::vector<int> all_nl;
+    while (it != it_end) {
+        cur1 = (*it);
+        xmlAttrPtr att = cur1->properties;
+        while (att != NULL) {
+            std::string aname((const char*)(att->name));
+            if (aname == "rid" || aname == "id")
+            // accept id/rid
+            {
+                rnl = (const char*)(att->children->content);
+            }
+            else {
+                std::map<std::string, int>::iterator iit = nlms_id.find(aname);
+                if (iit != nlms_id.end())
+                // valid for n,l,m,s
+                {
+                    nlms[(*iit).second] =
+                        atoi((const char*)(att->children->content));
+                }
+            }
+            att = att->next;
+        }
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+        ++it;
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSetH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+
+    int numbasisgroups(0);
+    if (myComm->rank() == 0) {
+        if (!hin.readEntry(numbasisgroups, "NbBasisGroups"))
+            PRE.error(
+                "Could not read NbBasisGroups in H5; Probably Corrupt H5 file",
+                true);
+    }
+    myComm->bcast(numbasisgroups);
+
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        int l(0);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(l, "l");
+            hin.pop();
+        }
+        myComm->bcast(l);
+
+        Lmax = std::max(Lmax, l);
+        // expect that only Rnl is given
+        if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
+            num += (l + 1) * (l + 2) / 2;
+        else if (expandlm)
+            num += 2 * l + 1;
+        else
+            num++;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center
+    std::vector<int> all_nl;
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(rnl, "rid");
+            hin.read(nlms[0], "n");
+            hin.read(nlms[1], "l");
+        }
+        myComm->bcast(rnl);
+        myComm->bcast(nlms[0]);
+        myComm->bcast(nlms[1]);
+
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+
+        if (myComm->rank() == 0)
+            hin.pop();
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+int
+AOBasisBuilderT<COT>::expandYlm(
+    COT* aos, std::vector<int>& all_nl, int expandlm)
+{
+    int num = 0;
+    if (expandlm == GAUSSIAN_EXPAND) {
+        app_log() << "Expanding Ylm according to Gaussian98" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << 2 * l + 1
+                      << " spherical orbitals for l= " << l << std::endl;
+            switch (l) {
+            case (0):
+                aos->LM[num] = aos->Ylm.index(0, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1): // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default: // 0,1,-1,2,-2,...,l,-l
+                aos->LM[num] = aos->Ylm.index(l, 0);
+                aos->NL[num] = nl;
+                num++;
+                for (int tm = 1; tm <= l; tm++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                    num++;
+                    aos->LM[num] = aos->Ylm.index(l, -tm);
+                    aos->NL[num] = nl;
+                    num++;
+                }
+                break;
+            }
+        }
+    }
+    else if (expandlm == MOD_NATURAL_EXPAND) {
+        app_log()
+            << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l"
+            << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            if (l == 1) {
+                // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+            }
+            else {
+                for (int tm = -l; tm <= l; tm++, num++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                }
+            }
+        }
+    }
+    else if (expandlm == NATURAL_EXPAND) {
+        app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            for (int tm = -l; tm <= l; tm++, num++) {
+                aos->LM[num] = aos->Ylm.index(l, tm);
+                aos->NL[num] = nl;
+            }
+        }
+    }
+    else if (expandlm == CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to Gamess "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) {
+                aos->LM[num] = nbefore + i;
+                aos->NL[num] = nl;
+                num++;
+            }
+        }
+    }
+    else if (expandlm == DIRAC_CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to DIRAC "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            switch (l) {
+            case (0):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (2):
+                aos->LM[num] = nbefore + 0; // xx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // yz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (3):
+                aos->LM[num] = nbefore + 0; // xxx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xxy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xxz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // xyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // xyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // xzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // yyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // yzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zzz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (4):
+                aos->LM[num] = nbefore + 0; // 400
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 310
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 301
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 220
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 211
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 202
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 130
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 121
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 112
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 103
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 040
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 031
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 022
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 013
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 004
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (5):
+                aos->LM[num] = nbefore + 0; // 500
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 410
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 401
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 320
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 311
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 302
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 230
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 221
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 212
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 203
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 140
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 131
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 122
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 113
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 104
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 050
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 041
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 032
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 023
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 014
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 005
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (6):
+                aos->LM[num] = nbefore + 0; // 600
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 510
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 501
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 420
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 411
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 402
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 330
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 21; // 321
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 22; // 312
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 303
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 240
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 23; // 231
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 27; // 222
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 25; // 213
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 204
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 150
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 141
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 24; // 132
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 26; // 123
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 114
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 105
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 060
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 051
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 042
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 033
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 024
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 015
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 006
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default:
+                myComm->barrier_and_abort(
+                    "Cartesian Tensor only defined up to Lmax=6. Aborting\n");
+                break;
+            }
+        }
+    }
+    else {
+        for (int ind = 0; ind < all_nl.size(); ind++) {
+            int nl = all_nl[ind];
+            int l = aos->RnlID[nl][q_l];
+            int m = aos->RnlID[nl][q_m];
+            // assign the index for real Spherical Harmonic with (l,m)
+            aos->LM[num] = aos->Ylm.index(l, m);
+            // assign the index for radial orbital with (n,l)
+            aos->NL[num] = nl;
+            // increment number of basis functions
+            num++;
+        }
+    }
+    return num;
+}
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
new file mode 100644
index 0000000000..144b2b4dc9
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
@@ -0,0 +1,75 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+#define QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+
+
+#include "Message/MPIObjectBase.h"
+#include "hdf/hdf_archive.h"
+#include "QMCWaveFunctions/SPOSet.h"
+
+namespace qmcplusplus
+{
+/** atomic basisset builder
+   * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet<RF,SH>
+   *
+   * Reimplement AtomiSPOSetBuilder.h
+   */
+template<typename COT>
+class AOBasisBuilderT : public MPIObjectBase
+{
+public:
+  enum
+  {
+    DONOT_EXPAND    = 0,
+    GAUSSIAN_EXPAND = 1,
+    NATURAL_EXPAND,
+    CARTESIAN_EXPAND,
+    MOD_NATURAL_EXPAND,
+    DIRAC_CARTESIAN_EXPAND
+  };
+
+private:
+  bool addsignforM;
+  int expandlm;
+  std::string Morder;
+  std::string sph;
+  std::string basisType;
+  std::string elementType;
+  std::string Normalized;
+
+  ///map for the radial orbitals
+  std::map<std::string, int> RnlID;
+
+  ///map for (n,l,m,s) to its quantum number index
+  std::map<std::string, int> nlms_id;
+
+public:
+  AOBasisBuilderT(const std::string& eName, Communicate* comm);
+
+  bool put(xmlNodePtr cur);
+  bool putH5(hdf_archive& hin);
+
+  SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; }
+
+  std::unique_ptr<COT> createAOSet(xmlNodePtr cur);
+  std::unique_ptr<COT> createAOSetH5(hdf_archive& hin);
+
+  int expandYlm(COT* aos, std::vector<int>& all_nl, int expandlm = DONOT_EXPAND);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
index 1178491533..d41624e9db 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
@@ -81,8 +81,8 @@ CuspCorrectionConstructionT<T>::removeSTypeOrbitals(
 // rc
 template <typename T>
 void
-CuspCorrectionConstructionT<T>::computeRadialPhiBar(ParticleSet* targetP,
-    ParticleSet* sourceP, int curOrb_, int curCenter_, SPOSetT<T>* Phi,
+CuspCorrectionConstructionT<T>::computeRadialPhiBar(ParticleSetT<T>* targetP,
+    ParticleSetT<T>* sourceP, int curOrb_, int curCenter_, SPOSetT<T>* Phi,
     Vector<RealType>& xgrid, Vector<RealType>& rad_orb,
     const CuspCorrectionParametersT<T>& data)
 {
@@ -363,9 +363,9 @@ CuspCorrectionConstructionT<T>::minimizeForRc(CuspCorrectionT<T>& cusp,
 template <typename T>
 void
 CuspCorrectionConstructionT<T>::applyCuspCorrection(
-    const Matrix<CuspCorrectionParametersT<T>>& info, ParticleSet& targetPtcl,
-    ParticleSet& sourcePtcl, LCAOrbitalSetT<T>& lcao,
-    SoaCuspCorrectionT<T>& cusp, const std::string& id)
+    const Matrix<CuspCorrectionParametersT<T>>& info,
+    ParticleSetT<T>& targetPtcl, ParticleSetT<T>& sourcePtcl,
+    LCAOrbitalSetT<T>& lcao, SoaCuspCorrectionT<T>& cusp, const std::string& id)
 {
     const int num_centers = info.rows();
     const int orbital_set_size = info.cols();
@@ -459,9 +459,9 @@ CuspCorrectionConstructionT<T>::applyCuspCorrection(
 template <typename T>
 void
 CuspCorrectionConstructionT<T>::generateCuspInfo(
-    Matrix<CuspCorrectionParametersT<T>>& info, const ParticleSet& targetPtcl,
-    const ParticleSet& sourcePtcl, const LCAOrbitalSetT<T>& lcao,
-    const std::string& id, Communicate& Comm)
+    Matrix<CuspCorrectionParametersT<T>>& info,
+    const ParticleSetT<T>& targetPtcl, const ParticleSetT<T>& sourcePtcl,
+    const LCAOrbitalSetT<T>& lcao, const std::string& id, Communicate& Comm)
 {
     const int num_centers = info.rows();
     const int orbital_set_size = info.cols();
@@ -507,8 +507,8 @@ CuspCorrectionConstructionT<T>::generateCuspInfo(
 #pragma omp parallel for schedule(dynamic) collapse(2)
     for (int center_idx = 0; center_idx < num_centers; center_idx++) {
         for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) {
-            ParticleSet localTargetPtcl(targetPtcl);
-            ParticleSet localSourcePtcl(sourcePtcl);
+            ParticleSetT<T> localTargetPtcl(targetPtcl);
+            ParticleSetT<T> localSourcePtcl(sourcePtcl);
 
             LCAOrbitalSetT<T> local_phi("local_phi",
                 std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
@@ -684,7 +684,7 @@ CuspCorrectionConstructionT<T>::readCuspInfo(const std::string& cuspInfoFile,
                 if (cname == "orbital") {
                     int orb = -1;
                     OhmmsAttributeSet orbAttrib;
-                    QMCTraits::RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9;
+                    RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9;
                     orbAttrib.add(orb, "num");
                     orbAttrib.add(a1, "redo");
                     orbAttrib.add(a2, "C");
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
index 300443c4a0..497898bfe8 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
@@ -22,7 +22,8 @@ class Communicate;
 namespace qmcplusplus
 {
 
-class ParticleSet;
+template <typename T>
+class ParticleSetT;
 
 template <typename T>
 class OneMolecularOrbitalT
@@ -64,7 +65,7 @@ class OneMolecularOrbitalT
     }
 
     OneMolecularOrbitalT(
-        ParticleSet* targetP, ParticleSet* sourceP, SPOSetPtr Phi) :
+        ParticleSetT<T>* targetP, ParticleSetT<T>* sourceP, SPOSetPtr Phi) :
         targetPtcl(targetP),
         sourcePtcl(sourceP),
         curOrb(0),
@@ -91,9 +92,9 @@ class OneMolecularOrbitalT
     ValueVector lap1;
 
     /// target ParticleSet
-    ParticleSet* targetPtcl;
+    ParticleSetT<T>* targetPtcl;
     /// source ParticleSet
-    ParticleSet* sourcePtcl;
+    ParticleSetT<T>* sourcePtcl;
 
     /// Index of orbital
     int curOrb;
@@ -134,8 +135,8 @@ class CuspCorrectionConstructionT
 
     /// Compute the radial part of the corrected wavefunction
     static void
-    computeRadialPhiBar(ParticleSet* targetP, ParticleSet* sourceP, int curOrb_,
-        int curCenter_, SPOSetT<T>* Phi, Vector<RealType>& xgrid,
+    computeRadialPhiBar(ParticleSetT<T>* targetP, ParticleSetT<T>* sourceP,
+        int curOrb_, int curCenter_, SPOSetT<T>* Phi, Vector<RealType>& xgrid,
         Vector<RealType>& rad_orb, const CuspCorrectionParametersT<T>& data);
 
     /** Ideal local energy at one point
@@ -274,13 +275,13 @@ class CuspCorrectionConstructionT
     // Modifies orbital set lcwc
     static void
     applyCuspCorrection(const Matrix<CuspCorrectionParametersT<T>>& info,
-        ParticleSet& targetPtcl, ParticleSet& sourcePtcl,
+        ParticleSetT<T>& targetPtcl, ParticleSetT<T>& sourcePtcl,
         LCAOrbitalSetT<T>& lcao, SoaCuspCorrectionT<T>& cusp,
         const std::string& id);
 
     static void
     generateCuspInfo(Matrix<CuspCorrectionParametersT<T>>& info,
-        const ParticleSet& targetPtcl, const ParticleSet& sourcePtcl,
+        const ParticleSetT<T>& targetPtcl, const ParticleSetT<T>& sourcePtcl,
         const LCAOrbitalSetT<T>& lcao, const std::string& id,
         Communicate& Comm);
 
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
index 4e5a3fd2b0..6b71c88bd3 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
@@ -1,205 +1,208 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //
-// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-#include "LCAOSpinorBuilder.h"
-#include "QMCWaveFunctions/SpinorSet.h"
+#include "LCAOSpinorBuilderT.h"
+
+#include "Message/CommOperators.h"
 #include "OhmmsData/AttributeSet.h"
+#include "QMCWaveFunctions/SpinorSetT.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "hdf/hdf_archive.h"
-#include "Message/CommOperators.h"
 
 namespace qmcplusplus
 {
-template<class T>
-LCAOSpinorBuilderT<T>::LCAOSpinorBuilderT(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur)
-    : LCAOrbitalBuilder(els, ions, comm, cur)
+template <class T>
+LCAOSpinorBuilderT<T>::LCAOSpinorBuilderT(ParticleSetT<T>& els,
+    ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur) :
+    LCAOrbitalBuilderT<T>(els, ions, comm, cur)
 {
-  ClassName = "LCAOSpinorBuilder";
+    this->ClassName = "LCAOSpinorBuilder";
 
-  if (h5_path == "")
-    myComm->barrier_and_abort("LCAOSpinorBuilder only works with href");
+    if (this->h5_path == "")
+        this->myComm->barrier_and_abort(
+            "LCAOSpinorBuilder only works with href");
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-  ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)");
-  std::string spo_name(""), optimize("no");
-  std::string basisset_name("LCAOBSet");
-  OhmmsAttributeSet spoAttrib;
-  spoAttrib.add(spo_name, "name");
-  spoAttrib.add(optimize, "optimize");
-  spoAttrib.add(basisset_name, "basisset");
-  spoAttrib.put(cur);
-
-  BasisSet_t* myBasisSet = nullptr;
-  if (basisset_map_.find(basisset_name) == basisset_map_.end())
-    myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n");
-  else
-    myBasisSet = basisset_map_[basisset_name].get();
-
-  if (optimize == "yes")
-    app_log() << "  SPOSet " << spo_name << " is optimizable\n";
-
-  std::unique_ptr<LCAOrbitalSet> upspo =
-      std::make_unique<LCAOrbitalSet>(spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-  std::unique_ptr<LCAOrbitalSet> dnspo =
-      std::make_unique<LCAOrbitalSet>(spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-
-  loadMO(*upspo, *dnspo, cur);
-
-  //create spinor and register up/dn
-  auto spinor_set = std::make_unique<SpinorSet>(spo_name);
-  spinor_set->set_spos(std::move(upspo), std::move(dnspo));
-  return spinor_set;
+    ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
+    std::string spo_name(""), optimize("no");
+    std::string basisset_name("LCAOBSet");
+    OhmmsAttributeSet spoAttrib;
+    spoAttrib.add(spo_name, "name");
+    spoAttrib.add(optimize, "optimize");
+    spoAttrib.add(basisset_name, "basisset");
+    spoAttrib.put(cur);
+
+    BasisSet_t* myBasisSet = nullptr;
+    if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end())
+        this->myComm->barrier_and_abort(
+            "basisset \"" + basisset_name + "\" cannot be found\n");
+    else
+        myBasisSet = this->basisset_map_[basisset_name].get();
+
+    if (optimize == "yes")
+        app_log() << "  SPOSet " << spo_name << " is optimizable\n";
+
+    auto upspo = std::make_unique<LCAOrbitalSetT<T>>(
+        spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+    auto dnspo = std::make_unique<LCAOrbitalSetT<T>>(
+        spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+
+    loadMO(*upspo, *dnspo, cur);
+
+    // create spinor and register up/dn
+    auto spinor_set = std::make_unique<SpinorSetT<T>>(spo_name);
+    spinor_set->set_spos(std::move(upspo), std::move(dnspo));
+    return spinor_set;
 }
 
-template<class T>
-bool LCAOSpinorBuilderT<T>::loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
+template <class T>
+bool
+LCAOSpinorBuilderT<T>::loadMO(
+    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
 {
-  bool PBC = false;
-  int norb = up.getBasisSetSize();
-  std::string debugc("no");
-  OhmmsAttributeSet aAttrib;
-  aAttrib.add(norb, "size");
-  aAttrib.add(debugc, "debug");
-  aAttrib.put(cur);
-
-  up.setOrbitalSetSize(norb);
-  dn.setOrbitalSetSize(norb);
-
-  xmlNodePtr occ_ptr = nullptr;
-  cur                = cur->xmlChildrenNode;
-  while (cur != nullptr)
-  {
-    std::string cname((const char*)(cur->name));
-    if (cname == "occupation")
-    {
-      occ_ptr = cur;
+    bool PBC = false;
+    int norb = up.getBasisSetSize();
+    std::string debugc("no");
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(norb, "size");
+    aAttrib.add(debugc, "debug");
+    aAttrib.put(cur);
+
+    up.setOrbitalSetSize(norb);
+    dn.setOrbitalSetSize(norb);
+
+    xmlNodePtr occ_ptr = nullptr;
+    cur = cur->xmlChildrenNode;
+    while (cur != nullptr) {
+        std::string cname((const char*)(cur->name));
+        if (cname == "occupation") {
+            occ_ptr = cur;
+        }
+        cur = cur->next;
     }
-    cur = cur->next;
-  }
-
-  hdf_archive hin(myComm);
-  if (myComm->rank() == 0)
-  {
-    if (!hin.open(h5_path, H5F_ACC_RDONLY))
-      myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing or incorrect path to H5 file.");
-    hin.push("PBC");
-    PBC = false;
-    hin.read(PBC, "PBC");
-    hin.close();
-  }
-  myComm->bcast(PBC);
-  if (PBC)
-    myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
-
-  bool success = putFromH5(up, dn, occ_ptr);
-
-
-  if (debugc == "yes")
-  {
-    app_log() << "UP:  Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols()
-              << std::endl;
-    app_log() << *up.C << std::endl;
-    app_log() << "DN:  Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols()
-              << std::endl;
-    app_log() << *dn.C << std::endl;
-  }
-  return success;
-}
 
-template<class T>
-bool LCAOSpinorBuilderT<T>::putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
-{
-  if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0)
-  {
-    myComm->barrier_and_abort("LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
-    return false;
-  }
-
-  bool success = true;
-  hdf_archive hin(myComm);
-  if (myComm->rank() == 0)
-  {
-    istd::string setname = "/Super_Twist/eigenset_0";
-    readRealMatrixFromH5(hin, setname, upReal);
-    setname += "_imag";
-    readRealMatrixFromH5(hin, setname, upImag);
-
-    af(!hin.open(h5_path, H5F_ACC_RDONLY))
-        myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or incorrect path to H5 file");
-
-    Matrix<RealType> upReal;
-    Matrix<RealType> upImag;
-    ssert(upReal.rows() == upImag.rows());
-    assert(upReal.cols() == upImag.cols());
-
-    Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
-    for (int i = 0; i < upTemp.rows(); i++)
-    {
-      for (int j = 0; j < upTemp.cols(); j++)
-      {
-        upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]);
-      }
+    hdf_archive hin(this->myComm);
+    if (this->myComm->rank() == 0) {
+        if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+            this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing "
+                                            "or incorrect path to H5 file.");
+        hin.push("PBC");
+        PBC = false;
+        hin.read(PBC, "PBC");
+        hin.close();
     }
-
-    Matrix<RealType> dnReal;
-    Matrix<RealType> dnImag;
-    setname = "/Super_Twist/eigenset_1";
-    readRealMatrixFromH5(hin, setname, dnReal);
-    setname += "_imag";
-    readRealMatrixFromH5(hin, setname, dnImag);
-
-    assert(dnReal.rows() == dnImag.rows());
-    assert(dnReal.cols() == dnImag.cols());
-
-    Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
-    for (int i = 0; i < dnTemp.rows(); i++)
-    {
-      for (int j = 0; j < dnTemp.cols(); j++)
-      {
-        dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
-      }
+    this->myComm->bcast(PBC);
+    if (PBC)
+        this->myComm->barrier_and_abort(
+            "LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
+
+    bool success = putFromH5(up, dn, occ_ptr);
+
+    if (debugc == "yes") {
+        app_log() << "UP:  Single-particle orbital coefficients dims="
+                  << up.C->rows() << " x " << up.C->cols() << std::endl;
+        app_log() << *up.C << std::endl;
+        app_log() << "DN:  Single-particle orbital coefficients dims="
+                  << dn.C->rows() << " x " << dn.C->cols() << std::endl;
+        app_log() << *dn.C << std::endl;
     }
+    return success;
+}
 
-    assert(upReal.rows() == dnReal.rows());
-    assert(upReal.cols() == dnReal.cols());
-
-    Occ.resize(upReal.rows());
-    success = putOccupation(up, occ_ptr);
-
-    int norbs = up.getOrbitalSetSize();
-
-    int n = 0, i = 0;
-    while (i < norbs)
-    {
-      if (Occ[n] > 0.0)
-      {
-        std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
-        std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
-        i++;
-      }
-      n++;
+template <class T>
+bool
+LCAOSpinorBuilderT<T>::putFromH5(
+    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
+{
+    if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) {
+        this->myComm->barrier_and_abort(
+            "LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
+        return false;
     }
 
-    hin.close();
-  }
+    bool success = true;
+    hdf_archive hin(this->myComm);
+    if (this->myComm->rank() == 0) {
+        Matrix<RealType> upReal;
+        Matrix<RealType> upImag;
+        std::string setname = "/Super_Twist/eigenset_0";
+        this->readRealMatrixFromH5(hin, setname, upReal);
+        setname += "_imag";
+        this->readRealMatrixFromH5(hin, setname, upImag);
+
+        if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+            this->myComm->barrier_and_abort(
+                "LCAOSpinorBuilder::putFromH5 missing or "
+                "incorrect path to H5 file");
+
+        assert(upReal.rows() == upImag.rows());
+        assert(upReal.cols() == upImag.cols());
+
+        Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
+        for (int i = 0; i < upTemp.rows(); i++) {
+            for (int j = 0; j < upTemp.cols(); j++) {
+                upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]};
+            }
+        }
+
+        Matrix<RealType> dnReal;
+        Matrix<RealType> dnImag;
+        setname = "/Super_Twist/eigenset_1";
+        this->readRealMatrixFromH5(hin, setname, dnReal);
+        setname += "_imag";
+        this->readRealMatrixFromH5(hin, setname, dnImag);
+
+        assert(dnReal.rows() == dnImag.rows());
+        assert(dnReal.cols() == dnImag.cols());
+
+        Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
+        for (int i = 0; i < dnTemp.rows(); i++) {
+            for (int j = 0; j < dnTemp.cols(); j++) {
+                dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
+            }
+        }
+
+        assert(upReal.rows() == dnReal.rows());
+        assert(upReal.cols() == dnReal.cols());
+
+        this->Occ.resize(upReal.rows());
+        success = this->putOccupation(up, occ_ptr);
+
+        int norbs = up.getOrbitalSetSize();
+
+        int n = 0, i = 0;
+        while (i < norbs) {
+            if (this->Occ[n] > 0.0) {
+                std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
+                std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
+                i++;
+            }
+            n++;
+        }
+
+        hin.close();
+    }
 
 #ifdef HAVE_MPI
-  myComm->comm.broadcast_n(up.C->data(), up.C->size());
-  myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
+    this->myComm->comm.broadcast_n(up.C->data(), up.C->size());
+    this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
 #endif
 
-  return success;
+    return success;
 }
 
 template class LCAOSpinorBuilderT<std::complex<double>>;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
index 62b40b43b1..e23014f44d 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
@@ -1,64 +1,74 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //
-// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H
-#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H
+#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H
+#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H
 
 #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h"
 
 namespace qmcplusplus
 {
 /** @file LCAOSpinorBuidler.h
-   *
-   * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to read up and
-   * down channel from HDF5 and construct SpinorSet
-   * 
-   */
-template<class T>
+ *
+ * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to
+ * read up and down channel from HDF5 and construct SpinorSet
+ *
+ */
+template <class T>
 class LCAOSpinorBuilderT : public LCAOrbitalBuilderT<T>
 {
 public:
-  /** constructor
+    using BasisSet_t = typename LCAOrbitalBuilderT<T>::BasisSet_t;
+    using RealType = typename LCAOrbitalBuilderT<T>::RealType;
+    using ValueType = typename LCAOrbitalBuilderT<T>::ValueType;
+
+    /** constructor
      * \param els reference to the electrons
      * \param ions reference to the ions
      *
      * Derives from LCAOrbitalBuilder, but will require an h5_path to be set
      */
-  LCAOSpinorBuilderT(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur);
+    LCAOSpinorBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions,
+        Communicate* comm, xmlNodePtr cur);
 
-  /** creates and returns SpinorSet
-   *
-   * Creates an up and down LCAOrbitalSet
-   * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file
-   * registers up and down into a SpinorSet and returns
-   */
-  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
+    /** creates and returns SpinorSet
+     *
+     * Creates an up and down LCAOrbitalSet
+     * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file
+     * registers up and down into a SpinorSet and returns
+     */
+    std::unique_ptr<SPOSetT<T>>
+    createSPOSetFromXML(xmlNodePtr cur) override;
 
 private:
-  /** load the up and down MO sets
-   *
-   * checks to make sure not PBC and initialize the Occ vector.
-   * call putFromH5 to parse the up and down MO coefficients 
-   */
-  bool loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur);
+    /** load the up and down MO sets
+     *
+     * checks to make sure not PBC and initialize the Occ vector.
+     * call putFromH5 to parse the up and down MO coefficients
+     */
+    bool
+    loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur);
 
-  /** parse h5 file for spinor info
-   *
-   * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part of up component of spinor
-   * assumes the h5 file as KPTS_0/eigenset_1(_imag) for the real/imag part of dn component of spinor
-   * reads the various coefficient matricies and broadcast
-   * after this, we have up/dn LCAOrbitalSet that can be registered to the SpinorSet
-   */
-  bool putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr);
+    /** parse h5 file for spinor info
+     *
+     * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part
+     * of up component of spinor assumes the h5 file as KPTS_0/eigenset_1(_imag)
+     * for the real/imag part of dn component of spinor reads the various
+     * coefficient matricies and broadcast after this, we have up/dn
+     * LCAOrbitalSet that can be registered to the SpinorSet
+     */
+    bool
+    putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr);
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
index 4e1e4f6bd1..39ea3953ee 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
@@ -19,22 +19,20 @@
 
 #include "LCAOrbitalBuilderT.h"
 
-#include "AOBasisBuilder.h"
+#include "AOBasisBuilderT.h"
+#include "CPU/math.hpp"
+#include "CuspCorrectionConstructionT.h"
 #include "LCAOrbitalSetT.h"
+#include "LCAOrbitalSetWithCorrectionT.h"
+#include "Message/CommOperators.h"
 #include "MultiFunctorAdapter.h"
 #include "MultiQuinticSpline1D.h"
 #include "Numerics/SoaCartesianTensor.h"
 #include "Numerics/SoaSphericalTensor.h"
 #include "OhmmsData/AttributeSet.h"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "SoaAtomicBasisSet.h"
-#include "SoaLocalizedBasisSet.h"
-#if !defined(QMC_COMPLEX)
-#include "CuspCorrectionConstructionT.h"
-#include "LCAOrbitalSetWithCorrectionT.h"
-#endif
-#include "CPU/math.hpp"
-#include "Message/CommOperators.h"
+#include "SoaAtomicBasisSetT.h"
+#include "SoaLocalizedBasisSetT.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "hdf/hdf_archive.h"
 
@@ -61,8 +59,8 @@ struct ao_traits<T, ORBT, 0, 0>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for numerical-spherical AO */
@@ -71,8 +69,8 @@ struct ao_traits<T, ORBT, 0, 1>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -81,8 +79,8 @@ struct ao_traits<T, ORBT, 1, 0>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -91,8 +89,8 @@ struct ao_traits<T, ORBT, 1, 1>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for STO-spherical AO */
@@ -101,8 +99,8 @@ struct ao_traits<T, ORBT, 2, 1>
 {
     using radial_type = MultiFunctorAdapter<SlaterCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 inline bool
@@ -112,8 +110,8 @@ is_same(const xmlChar* a, const char* b)
 }
 
 template <typename T>
-LCAOrbitalBuilderT<T>::LCAOrbitalBuilderT(
-    ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) :
+LCAOrbitalBuilderT<T>::LCAOrbitalBuilderT(ParticleSetT<T>& els,
+    ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur) :
     SPOSetBuilderT<T>("LCAO", comm),
     targetPtcl(els),
     sourcePtcl(ions),
@@ -243,7 +241,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<0, 1>(cur);
@@ -251,7 +249,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<0, 0>(cur);
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<1, 1>(cur);
@@ -259,12 +257,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<1, 0>(cur);
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSet<2, 1>(cur);
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
 
@@ -312,7 +310,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<0, 1>();
@@ -320,7 +318,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<0, 0>();
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<1, 1>();
@@ -328,12 +326,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<1, 0>();
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSetH5<2, 1>();
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
     return std::unique_ptr<BasisSet_t>(myBasisSet);
@@ -374,7 +372,7 @@ LCAOrbitalBuilderT<T>::createBasisSet(xmlNodePtr cur)
             auto it = std::find(
                 ao_built_centers.begin(), ao_built_centers.end(), elementType);
             if (it == ao_built_centers.end()) {
-                AOBasisBuilder<ao_type> any(elementType, this->myComm);
+                AOBasisBuilderT<ao_type> any(elementType, this->myComm);
                 any.put(cur);
                 auto aoBasis = any.createAOSet(cur);
                 if (aoBasis) {
@@ -453,7 +451,7 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
         auto it = std::find(
             ao_built_centers.begin(), ao_built_centers.end(), elementType);
         if (it == ao_built_centers.end()) {
-            AOBasisBuilder<ao_type> any(elementType, this->myComm);
+            AOBasisBuilderT<ao_type> any(elementType, this->myComm);
             any.putH5(hin);
             auto aoBasis = any.createAOSetH5(hin);
             if (aoBasis) {
@@ -478,6 +476,176 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
     return mBasisSet;
 }
 
+template <>
+std::unique_ptr<SPOSetT<double>>
+LCAOrbitalBuilderT<double>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<double>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<double>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSetT<double> tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<double>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<double>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<double>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<double>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<double>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<double>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<double>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<float>>
+LCAOrbitalBuilderT<float>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<float>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<float>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSetT<float> tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<float>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<float>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<float>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<float>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<float>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<float>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<float>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<double>>>
+LCAOrbitalBuilderT<std::complex<double>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+    return std::unique_ptr<SPOSetT<std::complex<double>>>{};
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<float>>>
+LCAOrbitalBuilderT<std::complex<float>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+    return std::unique_ptr<SPOSetT<std::complex<float>>>{};
+}
+
 template <typename T>
 std::unique_ptr<SPOSetT<T>>
 LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
@@ -501,18 +669,8 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 
     std::unique_ptr<SPOSetT<T>> sposet;
     if (doCuspCorrection) {
-#if defined(QMC_COMPLEX)
-        this->myComm->barrier_and_abort(
-            "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
-            "supported on complex LCAO.");
-#else
-        app_summary() << "        Using cusp correction." << std::endl;
-        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(
-            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
-        loadMO(lcwc->lcao, cur);
-        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
-        sposet = std::move(lcwc);
-#endif
+        createWithCuspCorrection(
+            cur, spo_name, cusp_file, std::move(myBasisSet));
     }
     else {
         auto lcos = std::make_unique<LCAOrbitalSetT<T>>(
@@ -521,65 +679,6 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
         sposet = std::move(lcos);
     }
 
-#if !defined(QMC_COMPLEX)
-    if (doCuspCorrection) {
-        // Create a temporary particle set to use for cusp initialization.
-        // The particle coordinates left at the end are unsuitable for further
-        // computations. The coordinates get set to nuclear positions, which
-        // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
-        // This problem only appears when the electron positions are specified
-        // in the input. The random particle placement step executes after this
-        // part of the code, overwriting the leftover positions from the cusp
-        // initialization.
-        ParticleSet tmp_targetPtcl(targetPtcl);
-
-        const int num_centers = sourcePtcl.getTotalNum();
-        auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<T>&>(*sposet);
-
-        const int orbital_set_size = lcwc.getOrbitalSetSize();
-        Matrix<CuspCorrectionParametersT<T>> info(
-            num_centers, orbital_set_size);
-
-        // set a default file name if not given
-        if (cusp_file.empty())
-            cusp_file = spo_name + ".cuspInfo.xml";
-
-        bool file_exists(
-            this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
-        this->myComm->bcast(file_exists);
-        app_log() << "  Cusp correction file " << cusp_file
-                  << (file_exists ? " exits." : " doesn't exist.") << std::endl;
-
-        // validate file if it exists
-        if (file_exists) {
-            bool valid = 0;
-            if (this->myComm->rank() == 0)
-                valid = CuspCorrectionConstructionT<T>::readCuspInfo(
-                    cusp_file, spo_name, orbital_set_size, info);
-            this->myComm->bcast(valid);
-            if (!valid)
-                this->myComm->barrier_and_abort(
-                    "Invalid cusp correction file " + cusp_file);
-#ifdef HAVE_MPI
-            for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
-                for (int center_idx = 0; center_idx < num_centers; center_idx++)
-                    CuspCorrectionConstructionT<T>::broadcastCuspInfo(
-                        info(center_idx, orb_idx), *this->myComm, 0);
-#endif
-        }
-        else {
-            CuspCorrectionConstructionT<T>::generateCuspInfo(info,
-                tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
-            if (this->myComm->rank() == 0)
-                CuspCorrectionConstructionT<T>::saveCusp(
-                    cusp_file, info, spo_name);
-        }
-
-        CuspCorrectionConstructionT<T>::applyCuspCorrection(
-            info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
-    }
-#endif
-
     return sposet;
 }
 
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
index a746326df7..5cff3a5612 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
@@ -20,7 +20,6 @@
 #ifndef QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H
 #define QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H
 
-#include "QMCWaveFunctions/BasisSetBase.h"
 #include "QMCWaveFunctions/LCAO/LCAOrbitalSetT.h"
 #include "QMCWaveFunctions/SPOSetBuilderT.h"
 
@@ -39,23 +38,24 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT<T>
 public:
     using BasisSet_t = typename LCAOrbitalSetT<T>::basis_type;
     using RealType = typename LCAOrbitalSetT<T>::RealType;
+    using ValueType = typename LCAOrbitalSetT<T>::ValueType;
     using PosType = typename LCAOrbitalSetT<T>::PosType;
 
     /** constructor
      * \param els reference to the electrons
      * \param ions reference to the ions
      */
-    LCAOrbitalBuilderT(
-        ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur);
+    LCAOrbitalBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions,
+        Communicate* comm, xmlNodePtr cur);
     ~LCAOrbitalBuilderT() override;
     std::unique_ptr<SPOSetT<T>>
     createSPOSetFromXML(xmlNodePtr cur) override;
 
 protected:
     /// target ParticleSet
-    ParticleSet& targetPtcl;
+    ParticleSetT<T>& targetPtcl;
     /// source ParticleSet
-    ParticleSet& sourcePtcl;
+    ParticleSetT<T>& sourcePtcl;
     /// localized basis set map
     std::map<std::string, std::unique_ptr<BasisSet_t>> basisset_map_;
     /// if true, add cusp correction to orbitals
@@ -125,6 +125,10 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT<T>
         Matrix<RealType>& Creal) const;
 
 private:
+    /// enable cusp correction
+    std::unique_ptr<SPOSetT<T>>
+    createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name,
+        std::string cusp_file, std::unique_ptr<BasisSet_t>&& myBasisSet);
     /// load a basis set from XML input
     std::unique_ptr<BasisSet_t>
     loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent);
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
index dba20478b7..6abd2d8b22 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
@@ -9,952 +9,948 @@
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #include "LCAOrbitalSetT.h"
-#include "Numerics/MatrixOperators.h"
+
 #include "CPU/BLAS.hpp"
+#include "Numerics/MatrixOperators.h"
 #include <ResourceCollection.h>
 
 namespace qmcplusplus
 {
 
-template<class T>
+template <class T>
 struct LCAOrbitalSetT<T>::LCAOMultiWalkerMem : public Resource
 {
-  LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {}
-  LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {}
+    LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT")
+    {
+    }
+    LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem()
+    {
+    }
 
-  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<LCAOMultiWalkerMem>(*this); }
+    std::unique_ptr<Resource>
+    makeClone() const override
+    {
+        return std::make_unique<LCAOMultiWalkerMem>(*this);
+    }
 
-  OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
-  OffloadMWVGLArray basis_mw;  // [5][NW][NumAO]
-  OffloadMWVArray phi_v;       // [NW][NumMO]
-  OffloadMWVArray basis_v_mw;  // [NW][NumMO]
+    OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
+    OffloadMWVGLArray basis_mw; // [5][NW][NumAO]
+    OffloadMWVArray phi_v; // [NW][NumMO]
+    OffloadMWVArray basis_v_mw; // [NW][NumMO]
 };
 
-template<class T>
-LCAOrbitalSetT<T>::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs)
-    : SPOSetT<T>(my_name),
-      BasisSetSize(bs ? bs->getBasisSetSize() : 0),
-      Identity(true),
-      basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
-      mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
-{
-  if (!bs)
-    throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its  basis set!");
-  myBasisSet = std::move(bs);
-  Temp.resize(BasisSetSize);
-  Temph.resize(BasisSetSize);
-  Tempgh.resize(BasisSetSize);
-  this->OrbitalSetSize = BasisSetSize;
-  LCAOrbitalSetT<T>::checkObject();
-}
-
-template<class T>
-LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in)
-    : SPOSetT<T>(in),
-      myBasisSet(in.myBasisSet->makeClone()),
-      C(in.C),
-      BasisSetSize(in.BasisSetSize),
-      C_copy(in.C_copy),
-      Identity(in.Identity),
-      basis_timer_(in.basis_timer_),
-      mo_timer_(in.mo_timer_)
-{
-  Temp.resize(BasisSetSize);
-  Temph.resize(BasisSetSize);
-  Tempgh.resize(BasisSetSize);
-  if (!in.Identity)
-  {
+template <class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(
+    const std::string& my_name, std::unique_ptr<basis_type>&& bs) :
+    SPOSetT<T>(my_name),
+    BasisSetSize(bs ? bs->getBasisSetSize() : 0),
+    Identity(true),
+    basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
+    mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
+{
+    if (!bs)
+        throw std::runtime_error(
+            "LCAOrbitalSetT cannot take nullptr as its  basis set!");
+    myBasisSet = std::move(bs);
+    Temp.resize(BasisSetSize);
+    Temph.resize(BasisSetSize);
+    Tempgh.resize(BasisSetSize);
+    this->OrbitalSetSize = BasisSetSize;
+    LCAOrbitalSetT<T>::checkObject();
+}
+
+template <class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in) :
+    SPOSetT<T>(in),
+    myBasisSet(in.myBasisSet->makeClone()),
+    C(in.C),
+    BasisSetSize(in.BasisSetSize),
+    C_copy(in.C_copy),
+    Identity(in.Identity),
+    basis_timer_(in.basis_timer_),
+    mo_timer_(in.mo_timer_)
+{
+    Temp.resize(BasisSetSize);
+    Temph.resize(BasisSetSize);
+    Tempgh.resize(BasisSetSize);
+    if (!in.Identity) {
+        Tempv.resize(this->OrbitalSetSize);
+        Temphv.resize(this->OrbitalSetSize);
+        Tempghv.resize(this->OrbitalSetSize);
+    }
+    LCAOrbitalSetT<T>::checkObject();
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{
+    if (C)
+        throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot "
+                                 "reset existing MO coefficients");
+
+    Identity = false;
+    this->OrbitalSetSize = norbs;
+    C = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
     Tempv.resize(this->OrbitalSetSize);
     Temphv.resize(this->OrbitalSetSize);
     Tempghv.resize(this->OrbitalSetSize);
-  }
-  LCAOrbitalSetT<T>::checkObject();
+    LCAOrbitalSetT<T>::checkObject();
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+template <class T>
+void
+LCAOrbitalSetT<T>::checkObject() const
 {
-  if (C)
-    throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot reset existing MO coefficients");
+    if (Identity) {
+        if (this->OrbitalSetSize != BasisSetSize)
+            throw std::runtime_error(
+                "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize "
+                "must be equal if Identity = true!");
+        if (C)
+            throw std::runtime_error("LCAOrbitalSetT::checkObject C should be "
+                                     "nullptr if Identity = true!");
+    }
+    else {
+        if (!C)
+            throw std::runtime_error("LCAOrbitalSetT::checkObject C should not "
+                                     "be nullptr if Identity = false!");
+        if (this->OrbitalSetSize != C->rows())
+            throw std::runtime_error("LCAOrbitalSetT::checkObject C rows "
+                                     "doesn't match OrbitalSetSize.");
+        if (BasisSetSize != C->cols())
+            throw std::runtime_error("LCAOrbitalSetT::checkObject C columns "
+                                     "doesn't match BasisSetSize.");
+    }
+}
 
-  Identity       = false;
-  this->OrbitalSetSize = norbs;
-  C              = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
-  Tempv.resize(this->OrbitalSetSize);
-  Temphv.resize(this->OrbitalSetSize);
-  Tempghv.resize(this->OrbitalSetSize);
-  LCAOrbitalSetT<T>::checkObject();
+template <class T>
+void
+LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
+{
+    auto resource_index =
+        collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::checkObject() const
+template <class T>
+void
+LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection,
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-  if (Identity)
-  {
-    if (this->OrbitalSetSize != BasisSetSize)
-      throw std::runtime_error(
-          "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!");
-    if (C)
-      throw std::runtime_error("LCAOrbitalSetT::checkObject C should be nullptr if Identity = true!");
-  }
-  else
-  {
-    if (!C)
-      throw std::runtime_error("LCAOrbitalSetT::checkObject C should not be nullptr if Identity = false!");
-    if (this->OrbitalSetSize != C->rows())
-      throw std::runtime_error("LCAOrbitalSetT::checkObject C rows doesn't match OrbitalSetSize.");
-    if (BasisSetSize != C->cols())
-      throw std::runtime_error("LCAOrbitalSetT::checkObject C columns doesn't match BasisSetSize.");
-  }
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
+template <class T>
+void
+LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection,
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-  auto resource_index = collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    collection.takebackResource(spo_leader.mw_mem_handle_);
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+LCAOrbitalSetT<T>::makeClone() const
 {
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader          = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
+    return std::make_unique<LCAOrbitalSetT<T>>(*this);
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateValue(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  collection.takebackResource(spo_leader.mw_mem_handle_);
+    if (Identity) { // PAY ATTENTION TO COMPLEX
+        myBasisSet->evaluateV(P, iat, psi.data());
+    }
+    else {
+        Vector<T> vTemp(Temp.data(0), BasisSetSize);
+        this->myBasisSet->evaluateV(P, iat, vTemp.data());
+        assert(psi.size() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+        MatrixOperators::product(C_partial_view, vTemp, psi);
+    }
+}
+
+/** Find a better place for other user classes, Matrix should be padded as well
+ */
+template <typename T, unsigned D>
+static void
+Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B,
+    VectorSoaContainer<T, D>& C)
+{
+    constexpr char transa = 't';
+    constexpr char transb = 'n';
+    constexpr T zone(1);
+    constexpr T zero(0);
+    BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(),
+        A.data(), A.capacity(), zero, C.data(), C.capacity());
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi,
+    GradVector& dpsi, ValueVector& d2psi) const
+{
+    const size_t output_size = psi.size();
+    std::copy_n(temp.data(0), output_size, psi.data());
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    for (size_t j = 0; j < output_size; j++) {
+        dpsi[j][0] = gx[j];
+        dpsi[j][1] = gy[j];
+        dpsi[j][2] = gz[j];
+    }
+    std::copy_n(temp.data(4), output_size, d2psi.data());
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi,
+    GradVector& dpsi, HessVector& d2psi) const
+{
+    const size_t output_size = psi.size();
+    std::copy_n(temp.data(0), output_size, psi.data());
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    const T* restrict hxx = temp.data(4);
+    const T* restrict hxy = temp.data(5);
+    const T* restrict hxz = temp.data(6);
+    const T* restrict hyy = temp.data(7);
+    const T* restrict hyz = temp.data(8);
+    const T* restrict hzz = temp.data(9);
+
+    for (size_t j = 0; j < output_size; j++) {
+        dpsi[j][0] = gx[j];
+        dpsi[j][1] = gy[j];
+        dpsi[j][2] = gz[j];
+
+        d2psi[j](0, 0) = hxx[j];
+        d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+        d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+        d2psi[j](1, 1) = hyy[j];
+        d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+        d2psi[j](2, 2) = hzz[j];
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp, int i,
+    ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi,
+    GGGMatrix& dghpsi) const
+{
+    const size_t output_size = psi.cols();
+    std::copy_n(temp.data(0), output_size, psi[i]);
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    const T* restrict hxx = temp.data(4);
+    const T* restrict hxy = temp.data(5);
+    const T* restrict hxz = temp.data(6);
+    const T* restrict hyy = temp.data(7);
+    const T* restrict hyz = temp.data(8);
+    const T* restrict hzz = temp.data(9);
+    const T* restrict gh_xxx = temp.data(10);
+    const T* restrict gh_xxy = temp.data(11);
+    const T* restrict gh_xxz = temp.data(12);
+    const T* restrict gh_xyy = temp.data(13);
+    const T* restrict gh_xyz = temp.data(14);
+    const T* restrict gh_xzz = temp.data(15);
+    const T* restrict gh_yyy = temp.data(16);
+    const T* restrict gh_yyz = temp.data(17);
+    const T* restrict gh_yzz = temp.data(18);
+    const T* restrict gh_zzz = temp.data(19);
+
+    for (size_t j = 0; j < output_size; j++) {
+        dpsi[i][j][0] = gx[j];
+        dpsi[i][j][1] = gy[j];
+        dpsi[i][j][2] = gz[j];
+
+        d2psi[i][j](0, 0) = hxx[j];
+        d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+        d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+        d2psi[i][j](1, 1) = hyy[j];
+        d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+        d2psi[i][j](2, 2) = hzz[j];
+
+        dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx
+        dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy
+        dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz
+        dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
+        dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy
+        dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz
+        dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
+        dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
+        dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz
+
+        dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
+        dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
+        dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
+        dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
+        dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy
+        dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz
+        dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
+        dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz
+        dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz
+
+        dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz
+        dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
+        dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
+        dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
+        dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
+        dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
+        dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
+        dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
+        dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi,
+    GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const
+{
+    const size_t output_size = psi.size();
+    std::copy_n(temp.data(0), output_size, psi.data());
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    const T* restrict hxx = temp.data(4);
+    const T* restrict hxy = temp.data(5);
+    const T* restrict hxz = temp.data(6);
+    const T* restrict hyy = temp.data(7);
+    const T* restrict hyz = temp.data(8);
+    const T* restrict hzz = temp.data(9);
+    const T* restrict gh_xxx = temp.data(10);
+    const T* restrict gh_xxy = temp.data(11);
+    const T* restrict gh_xxz = temp.data(12);
+    const T* restrict gh_xyy = temp.data(13);
+    const T* restrict gh_xyz = temp.data(14);
+    const T* restrict gh_xzz = temp.data(15);
+    const T* restrict gh_yyy = temp.data(16);
+    const T* restrict gh_yyz = temp.data(17);
+    const T* restrict gh_yzz = temp.data(18);
+    const T* restrict gh_zzz = temp.data(19);
+
+    for (size_t j = 0; j < output_size; j++) {
+        dpsi[j][0] = gx[j];
+        dpsi[j][1] = gy[j];
+        dpsi[j][2] = gz[j];
+
+        d2psi[j](0, 0) = hxx[j];
+        d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+        d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+        d2psi[j](1, 1) = hyy[j];
+        d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+        d2psi[j](2, 2) = hzz[j];
+
+        dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx
+        dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy
+        dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz
+        dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
+        dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy
+        dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz
+        dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
+        dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
+        dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz
+
+        dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
+        dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
+        dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
+        dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
+        dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy
+        dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz
+        dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
+        dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy
+        dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz
+
+        dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz
+        dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
+        dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
+        dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
+        dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
+        dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
+        dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
+        dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
+        dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz
+    }
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> LCAOrbitalSetT<T>::makeClone() const { return std::make_unique<LCAOrbitalSetT<T>>(*this); }
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(
+    const vgl_type& temp, GradVector& dpsi) const
+{
+    const size_t output_size = dpsi.size();
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+
+    for (size_t j = 0; j < output_size; j++) {
+        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+        // that
+        //  for an atomic center, the ion gradient is the negative of the
+        //  elecron gradient. Hence minus signs for each of these.
+        dpsi[j][0] = -gx[j];
+        dpsi[j][1] = -gy[j];
+        dpsi[j][2] = -gz[j];
+    }
+}
 
-template<class T>
-void LCAOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  if (Identity)
-  { //PAY ATTENTION TO COMPLEX
-    myBasisSet->evaluateV(P, iat, psi.data());
-  }
-  else
-  {
-    Vector<T> vTemp(Temp.data(0), BasisSetSize);
-    this->myBasisSet->evaluateV(P, iat, vTemp.data());
-    assert(psi.size() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-    MatrixOperators::product(C_partial_view, vTemp, psi);
-  }
-}
-
-/** Find a better place for other user classes, Matrix should be padded as well */
-template<typename T, unsigned D>
-static void Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B, VectorSoaContainer<T, D>& C)
-{
-  constexpr char transa = 't';
-  constexpr char transb = 'n';
-  constexpr T zone(1);
-  constexpr T zero(0);
-  BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(),
-             C.capacity());
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
-                                             ValueVector& psi,
-                                             GradVector& dpsi,
-                                             ValueVector& d2psi) const
-{
-  const size_t output_size = psi.size();
-  std::copy_n(temp.data(0), output_size, psi.data());
-  const T* restrict gx = temp.data(1);
-  const T* restrict gy = temp.data(2);
-  const T* restrict gz = temp.data(3);
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dpsi[j][0] = gx[j];
-    dpsi[j][1] = gy[j];
-    dpsi[j][2] = gz[j];
-  }
-  std::copy_n(temp.data(4), output_size, d2psi.data());
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
-                                             ValueVector& psi,
-                                             GradVector& dpsi,
-                                             HessVector& d2psi) const
-{
-  const size_t output_size = psi.size();
-  std::copy_n(temp.data(0), output_size, psi.data());
-  const T* restrict gx  = temp.data(1);
-  const T* restrict gy  = temp.data(2);
-  const T* restrict gz  = temp.data(3);
-  const T* restrict hxx = temp.data(4);
-  const T* restrict hxy = temp.data(5);
-  const T* restrict hxz = temp.data(6);
-  const T* restrict hyy = temp.data(7);
-  const T* restrict hyz = temp.data(8);
-  const T* restrict hzz = temp.data(9);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dpsi[j][0] = gx[j];
-    dpsi[j][1] = gy[j];
-    dpsi[j][2] = gz[j];
-
-    d2psi[j](0, 0) = hxx[j];
-    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
-    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
-    d2psi[j](1, 1)                  = hyy[j];
-    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
-    d2psi[j](2, 2)                  = hzz[j];
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
-                                               int i,
-                                               ValueMatrix& psi,
-                                               GradMatrix& dpsi,
-                                               HessMatrix& d2psi,
-                                               GGGMatrix& dghpsi) const
-{
-  const size_t output_size = psi.cols();
-  std::copy_n(temp.data(0), output_size, psi[i]);
-  const T* restrict gx     = temp.data(1);
-  const T* restrict gy     = temp.data(2);
-  const T* restrict gz     = temp.data(3);
-  const T* restrict hxx    = temp.data(4);
-  const T* restrict hxy    = temp.data(5);
-  const T* restrict hxz    = temp.data(6);
-  const T* restrict hyy    = temp.data(7);
-  const T* restrict hyz    = temp.data(8);
-  const T* restrict hzz    = temp.data(9);
-  const T* restrict gh_xxx = temp.data(10);
-  const T* restrict gh_xxy = temp.data(11);
-  const T* restrict gh_xxz = temp.data(12);
-  const T* restrict gh_xyy = temp.data(13);
-  const T* restrict gh_xyz = temp.data(14);
-  const T* restrict gh_xzz = temp.data(15);
-  const T* restrict gh_yyy = temp.data(16);
-  const T* restrict gh_yyz = temp.data(17);
-  const T* restrict gh_yzz = temp.data(18);
-  const T* restrict gh_zzz = temp.data(19);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dpsi[i][j][0] = gx[j];
-    dpsi[i][j][1] = gy[j];
-    dpsi[i][j][2] = gz[j];
-
-    d2psi[i][j](0, 0) = hxx[j];
-    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
-    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
-    d2psi[i][j](1, 1)                     = hyy[j];
-    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
-    d2psi[i][j](2, 2)                     = hzz[j];
-
-    dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx
-    dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy
-    dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz
-    dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
-    dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy
-    dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz
-    dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
-    dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
-    dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz
-
-    dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
-    dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
-    dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
-    dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
-    dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy
-    dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz
-    dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
-    dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz
-    dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz
-
-    dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz
-    dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
-    dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
-    dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
-    dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
-    dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
-    dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
-    dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
-    dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
-                                               ValueVector& psi,
-                                               GradVector& dpsi,
-                                               HessVector& d2psi,
-                                               GGGVector& dghpsi) const
-{
-  const size_t output_size = psi.size();
-  std::copy_n(temp.data(0), output_size, psi.data());
-  const T* restrict gx     = temp.data(1);
-  const T* restrict gy     = temp.data(2);
-  const T* restrict gz     = temp.data(3);
-  const T* restrict hxx    = temp.data(4);
-  const T* restrict hxy    = temp.data(5);
-  const T* restrict hxz    = temp.data(6);
-  const T* restrict hyy    = temp.data(7);
-  const T* restrict hyz    = temp.data(8);
-  const T* restrict hzz    = temp.data(9);
-  const T* restrict gh_xxx = temp.data(10);
-  const T* restrict gh_xxy = temp.data(11);
-  const T* restrict gh_xxz = temp.data(12);
-  const T* restrict gh_xyy = temp.data(13);
-  const T* restrict gh_xyz = temp.data(14);
-  const T* restrict gh_xzz = temp.data(15);
-  const T* restrict gh_yyy = temp.data(16);
-  const T* restrict gh_yyz = temp.data(17);
-  const T* restrict gh_yzz = temp.data(18);
-  const T* restrict gh_zzz = temp.data(19);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dpsi[j][0] = gx[j];
-    dpsi[j][1] = gy[j];
-    dpsi[j][2] = gz[j];
-
-    d2psi[j](0, 0) = hxx[j];
-    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
-    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
-    d2psi[j](1, 1)                  = hyy[j];
-    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
-    d2psi[j](2, 2)                  = hzz[j];
-
-    dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx
-    dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy
-    dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz
-    dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
-    dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy
-    dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz
-    dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
-    dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
-    dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz
-
-    dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
-    dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
-    dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
-    dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
-    dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy
-    dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz
-    dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
-    dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy
-    dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz
-
-    dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz
-    dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
-    dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
-    dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
-    dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
-    dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
-    dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
-    dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
-    dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const
-{
-  const size_t output_size     = dpsi.size();
-  const T* restrict gx = temp.data(1);
-  const T* restrict gy = temp.data(2);
-  const T* restrict gz = temp.data(3);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
-    // for an atomic center, the ion gradient is the negative of the elecron gradient.
-    // Hence minus signs for each of these.
-    dpsi[j][0] = -gx[j];
-    dpsi[j][1] = -gy[j];
-    dpsi[j][2] = -gz[j];
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
-{
-  //TAKE CARE OF IDENTITY
-  {
-    ScopedTimer local(basis_timer_);
-    myBasisSet->evaluateVGL(P, iat, Temp);
-  }
-
-  if (Identity)
-    evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
-  else
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    {
-      ScopedTimer local(mo_timer_);
-      ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-      Product_ABt(Temp, C_partial_view, Tempv);
-    }
-    evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                   const RefVectorWithLeader<ParticleSet>& P_list,
-                                   int iat,
-                                   const RefVector<ValueVector>& psi_v_list,
-                                   const RefVector<GradVector>& dpsi_v_list,
-                                   const RefVector<ValueVector>& d2psi_v_list) const
-{
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  auto& phi_vgl_v  = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
-
-  phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
-  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
-
-  const size_t nw = phi_vgl_v.size(1);
-
-  //TODO: make this cleaner?
-  for (int iw = 0; iw < nw; iw++)
-  {
-    const size_t output_size = psi_v_list[iw].get().size();
-    std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
-    std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
-    // grads are [dim, walker, orb] in phi_vgl_v
-    //           [walker][orb, dim] in dpsi_v_list
-    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
-      BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                           const RefVectorWithLeader<ParticleSet>& P_list,
-                                           int iat,
-                                           OffloadMWVGLArray& phi_vgl_v) const
-{
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  auto& basis_mw   = spo_leader.mw_mem_handle_.getResource().basis_mw;
-  basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
-
-  {
-    ScopedTimer local(basis_timer_);
-    myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
-  }
-
-  if (Identity)
-  {
-    // output_size can be smaller than BasisSetSize
-    const size_t output_size = phi_vgl_v.size(2);
-    const size_t nw          = phi_vgl_v.size(1);
-
-    for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
-      for (int iw = 0; iw < nw; iw++)
-        std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0));
-  }
-  else
-  {
-    const size_t requested_orb_size = phi_vgl_v.size(2);
-    assert(requested_orb_size <= this->OrbitalSetSize);
+    // TAKE CARE OF IDENTITY
     {
-      ScopedTimer local(mo_timer_);
-      ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
-      // TODO: make class for general blas interface in Platforms
-      // have instance of that class as member of LCAOrbitalSetT, call gemm through that
-      BLAS::gemm('T', 'N',
-                 requested_orb_size,        // MOs
-                 spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
-                 BasisSetSize,              // AOs
-                 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
-                 requested_orb_size);
-    }
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                     const RefVectorWithLeader<ParticleSet>& P_list,
-                                     int iat,
-                                     const RefVector<ValueVector>& psi_v_list) const
-{
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  auto& phi_v      = spo_leader.mw_mem_handle_.getResource().phi_v;
-  phi_v.resize(spo_list.size(), this->OrbitalSetSize);
-  mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
-
-  const size_t output_size = phi_v.size(1);
-  const size_t nw          = phi_v.size(0);
-
-  for (int iw = 0; iw < nw; iw++)
-    std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                             const RefVectorWithLeader<ParticleSet>& P_list,
-                                             int iat,
-                                             OffloadMWVArray& phi_v) const
-{
-  assert(this == &spo_list.getLeader());
-  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-  const size_t nw  = spo_list.size();
-  auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
-  basis_v_mw.resize(nw, BasisSetSize);
-
-  myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
-
-  if (Identity)
-  {
-    std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0));
-  }
-  else
-  {
-    const size_t requested_orb_size = phi_v.size(1);
-    assert(requested_orb_size <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
-    BLAS::gemm('T', 'N',
-               requested_orb_size, // MOs
-               spo_list.size(),    // walkers
-               BasisSetSize,       // AOs
-               1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(),
-               requested_orb_size);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                         const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
-                                         const RefVector<ValueVector>& psi_list,
-                                         const std::vector<const T*>& invRow_ptr_list,
-                                         std::vector<std::vector<T>>& ratios_list) const
-{
-  const size_t nw = spo_list.size();
-  for (size_t iw = 0; iw < nw; iw++)
-  {
-    for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
+        ScopedTimer local(basis_timer_);
+        myBasisSet->evaluateVGL(P, iat, Temp);
+    }
+
+    if (Identity)
+        evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
+    else {
+        assert(psi.size() <= this->OrbitalSetSize);
+        {
+            ScopedTimer local(mo_timer_);
+            ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+            Product_ABt(Temp, C_partial_view, Tempv);
+        }
+        evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateVGL(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list,
+    const RefVector<GradVector>& dpsi_v_list,
+    const RefVector<ValueVector>& d2psi_v_list) const
+{
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
+
+    phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
+    mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+
+    const size_t nw = phi_vgl_v.size(1);
+
+    // TODO: make this cleaner?
+    for (int iw = 0; iw < nw; iw++) {
+        const size_t output_size = psi_v_list[iw].get().size();
+        std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size,
+            psi_v_list[iw].get().data());
+        std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size,
+            d2psi_v_list[iw].get().data());
+        // grads are [dim, walker, orb] in phi_vgl_v
+        //           [walker][orb, dim] in dpsi_v_list
+        for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+            BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1,
+                &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    OffloadMWVGLArray& phi_vgl_v) const
+{
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw;
+    basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
+
     {
-      spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
-      ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size());
+        ScopedTimer local(basis_timer_);
+        myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
+    }
+
+    if (Identity) {
+        // output_size can be smaller than BasisSetSize
+        const size_t output_size = phi_vgl_v.size(2);
+        const size_t nw = phi_vgl_v.size(1);
+
+        for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+            for (int iw = 0; iw < nw; iw++)
+                std::copy_n(basis_mw.data_at(idim, iw, 0), output_size,
+                    phi_vgl_v.data_at(idim, iw, 0));
+    }
+    else {
+        const size_t requested_orb_size = phi_vgl_v.size(2);
+        assert(requested_orb_size <= this->OrbitalSetSize);
+        {
+            ScopedTimer local(mo_timer_);
+            ValueMatrix C_partial_view(
+                C->data(), requested_orb_size, BasisSetSize);
+            // TODO: make class for general blas interface in Platforms
+            // have instance of that class as member of LCAOrbitalSetT, call
+            // gemm through that
+            BLAS::gemm('T', 'N',
+                requested_orb_size, // MOs
+                spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
+                BasisSetSize, // AOs
+                1, C_partial_view.data(), BasisSetSize, basis_mw.data(),
+                BasisSetSize, 0, phi_vgl_v.data(), requested_orb_size);
+        }
     }
-  }
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
-                                      ValueVector& psi,
-                                      const ValueVector& psiinv,
-                                      std::vector<T>& ratios)
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateValue(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list) const
 {
-  Vector<T> vTemp(Temp.data(0), BasisSetSize);
-  Vector<T> invTemp(Temp.data(1), BasisSetSize);
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v;
+    phi_v.resize(spo_list.size(), this->OrbitalSetSize);
+    mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
 
-  {
-    ScopedTimer local(mo_timer_);
-    // when only a subset of orbitals is used, extract limited rows of C.
-    Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
-    MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
-  }
+    const size_t output_size = phi_v.size(1);
+    const size_t nw = phi_v.size(0);
+
+    for (int iw = 0; iw < nw; iw++)
+        std::copy_n(
+            phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    OffloadMWVArray& phi_v) const
+{
+    assert(this == &spo_list.getLeader());
+    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+    const size_t nw = spo_list.size();
+    auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
+    basis_v_mw.resize(nw, BasisSetSize);
+
+    myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
+
+    if (Identity) {
+        std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw,
+            phi_v.data_at(0, 0));
+    }
+    else {
+        const size_t requested_orb_size = phi_v.size(1);
+        assert(requested_orb_size <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+        BLAS::gemm('T', 'N',
+            requested_orb_size, // MOs
+            spo_list.size(), // walkers
+            BasisSetSize, // AOs
+            1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(),
+            BasisSetSize, 0, phi_v.data(), requested_orb_size);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateDetRatios(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+    const RefVector<ValueVector>& psi_list,
+    const std::vector<const T*>& invRow_ptr_list,
+    std::vector<std::vector<T>>& ratios_list) const
+{
+    const size_t nw = spo_list.size();
+    for (size_t iw = 0; iw < nw; iw++) {
+        for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) {
+            spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
+            ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(),
+                invRow_ptr_list[iw], psi_list[iw].get().size());
+        }
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+    ValueVector& psi, const ValueVector& psiinv, std::vector<T>& ratios)
+{
+    Vector<T> vTemp(Temp.data(0), BasisSetSize);
+    Vector<T> invTemp(Temp.data(1), BasisSetSize);
 
-  for (size_t j = 0; j < VP.getTotalNum(); j++)
-  {
     {
-      ScopedTimer local(basis_timer_);
-      myBasisSet->evaluateV(VP, j, vTemp.data());
-    }
-    ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                                   const RefVectorWithLeader<ParticleSet>& P_list,
-                                                   int iat,
-                                                   const std::vector<const T*>& invRow_ptr_list,
-                                                   OffloadMWVGLArray& phi_vgl_v,
-                                                   std::vector<T>& ratios,
-                                                   std::vector<GradType>& grads) const
-{
-  assert(this == &spo_list.getLeader());
-  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
-  assert(phi_vgl_v.size(1) == spo_list.size());
-
-  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
-  // Device data of phi_vgl_v must be up-to-date upon return
-  phi_vgl_v.updateTo();
-
-  const size_t nw             = spo_list.size();
-  const size_t norb_requested = phi_vgl_v.size(2);
-  for (int iw = 0; iw < nw; iw++)
-  {
-    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
-    GradType dphi;
-    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
-      dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
-    grads[iw] = dphi;
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
-{
-  //TAKE CARE OF IDENTITY
-  myBasisSet->evaluateVGH(P, iat, Temph);
-  if (Identity)
-    evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
-  else
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-    Product_ABt(Temph, C_partial_view, Temphv);
-    evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSet& P,
-                                  int iat,
-                                  ValueVector& psi,
-                                  GradVector& dpsi,
-                                  HessVector& dhpsi,
-                                  GGGVector& dghpsi)
-{
-  // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n");
-
-  //TAKE CARE OF IDENTITY
-  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
-  if (Identity)
-    evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
-  else
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-    Product_ABt(Tempgh, C_partial_view, Tempghv);
-    evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
-  }
+        ScopedTimer local(mo_timer_);
+        // when only a subset of orbitals is used, extract limited rows of C.
+        Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
+        MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
+    }
+
+    for (size_t j = 0; j < VP.getTotalNum(); j++) {
+        {
+            ScopedTimer local(basis_timer_);
+            myBasisSet->evaluateV(VP, j, vTemp.data());
+        }
+        ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
+    std::vector<T>& ratios, std::vector<GradType>& grads) const
+{
+    assert(this == &spo_list.getLeader());
+    assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+    assert(phi_vgl_v.size(1) == spo_list.size());
+
+    mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+    // Device data of phi_vgl_v must be up-to-date upon return
+    phi_vgl_v.updateTo();
+
+    const size_t nw = spo_list.size();
+    const size_t norb_requested = phi_vgl_v.size(2);
+    for (int iw = 0; iw < nw; iw++) {
+        ratios[iw] = simd::dot(
+            invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
+        GradType dphi;
+        for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+            dphi[idim] =
+                simd::dot(invRow_ptr_list[iw],
+                    phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) /
+                ratios[iw];
+        grads[iw] = dphi;
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateVGH(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
+{
+    // TAKE CARE OF IDENTITY
+    myBasisSet->evaluateVGH(P, iat, Temph);
+    if (Identity)
+        evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
+    else {
+        assert(psi.size() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+        Product_ABt(Temph, C_partial_view, Temphv);
+        evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, HessVector& dhpsi, GGGVector& dghpsi)
+{
+    // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not
+    // implemented\n");
+
+    // TAKE CARE OF IDENTITY
+    myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+    if (Identity)
+        evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
+    else {
+        assert(psi.size() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+        Product_ABt(Tempgh, C_partial_view, Tempghv);
+        evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
+    }
 }
 
 /* implement using gemm algorithm */
-template<class T>
-inline void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
-                                             int i,
-                                             ValueMatrix& logdet,
-                                             GradMatrix& dlogdet,
-                                             ValueMatrix& d2logdet) const
-{
-  const size_t output_size = logdet.cols();
-  std::copy_n(temp.data(0), output_size, logdet[i]);
-  const T* restrict gx = temp.data(1);
-  const T* restrict gy = temp.data(2);
-  const T* restrict gz = temp.data(3);
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dlogdet[i][j][0] = gx[j];
-    dlogdet[i][j][1] = gy[j];
-    dlogdet[i][j][2] = gz[j];
-  }
-  std::copy_n(temp.data(4), output_size, d2logdet[i]);
-}
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
-                                             int i,
-                                             ValueMatrix& psi,
-                                             GradMatrix& dpsi,
-                                             HessMatrix& d2psi) const
-{
-  const size_t output_size = psi.cols();
-  std::copy_n(temp.data(0), output_size, psi[i]);
-  const T* restrict gx  = temp.data(1);
-  const T* restrict gy  = temp.data(2);
-  const T* restrict gz  = temp.data(3);
-  const T* restrict hxx = temp.data(4);
-  const T* restrict hxy = temp.data(5);
-  const T* restrict hxz = temp.data(6);
-  const T* restrict hyy = temp.data(7);
-  const T* restrict hyz = temp.data(8);
-  const T* restrict hzz = temp.data(9);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    dpsi[i][j][0] = gx[j];
-    dpsi[i][j][1] = gy[j];
-    dpsi[i][j][2] = gz[j];
-
-    d2psi[i][j](0, 0) = hxx[j];
-    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
-    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
-    d2psi[i][j](1, 1)                     = hyy[j];
-    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
-    d2psi[i][j](2, 2)                     = hzz[j];
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const
-{
-  const size_t output_size     = dpsi.cols();
-  const T* restrict gx = temp.data(1);
-  const T* restrict gy = temp.data(2);
-  const T* restrict gz = temp.data(3);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
-    // for an atomic center, the ion gradient is the negative of the elecron gradient.
-    // Hence minus signs for each of these.
-    dpsi[i][j][0] = -gx[j];
-    dpsi[i][j][1] = -gy[j];
-    dpsi[i][j][2] = -gz[j];
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
-                                                      int i,
-                                                      GradMatrix& dpsi,
-                                                      HessMatrix& dgpsi,
-                                                      GradMatrix& dlpsi) const
-{
-  const size_t output_size         = dpsi.cols();
-  const T* restrict gx     = temp.data(1);
-  const T* restrict gy     = temp.data(2);
-  const T* restrict gz     = temp.data(3);
-  const T* restrict hxx    = temp.data(4);
-  const T* restrict hxy    = temp.data(5);
-  const T* restrict hxz    = temp.data(6);
-  const T* restrict hyy    = temp.data(7);
-  const T* restrict hyz    = temp.data(8);
-  const T* restrict hzz    = temp.data(9);
-  const T* restrict gh_xxx = temp.data(10);
-  const T* restrict gh_xxy = temp.data(11);
-  const T* restrict gh_xxz = temp.data(12);
-  const T* restrict gh_xyy = temp.data(13);
-  const T* restrict gh_xzz = temp.data(15);
-  const T* restrict gh_yyy = temp.data(16);
-  const T* restrict gh_yyz = temp.data(17);
-  const T* restrict gh_yzz = temp.data(18);
-  const T* restrict gh_zzz = temp.data(19);
-
-  for (size_t j = 0; j < output_size; j++)
-  {
-    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
-    // for an atomic center, the ion gradient is the negative of the elecron gradient.
-    // Hence minus signs for each of these.
-    dpsi[i][j][0] = -gx[j];
-    dpsi[i][j][1] = -gy[j];
-    dpsi[i][j][2] = -gz[j];
-
-    dgpsi[i][j](0, 0) = -hxx[j];
-    dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
-    dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
-    dgpsi[i][j](1, 1)                     = -hyy[j];
-    dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
-    dgpsi[i][j](2, 2)                     = -hzz[j];
-
-    //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector.
-    dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
-    dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
-    dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                         int first,
-                                         int last,
-                                         ValueMatrix& logdet,
-                                         GradMatrix& dlogdet,
-                                         ValueMatrix& d2logdet)
-{
-  if (Identity)
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGL(P, iat, Temp);
-      evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
-    }
-  }
-  else
-  {
-    assert(logdet.cols() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGL(P, iat, Temp);
-      Product_ABt(Temp, C_partial_view, Tempv);
-      evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
+template <class T>
+inline void
+LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp, int i,
+    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) const
+{
+    const size_t output_size = logdet.cols();
+    std::copy_n(temp.data(0), output_size, logdet[i]);
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    for (size_t j = 0; j < output_size; j++) {
+        dlogdet[i][j][0] = gx[j];
+        dlogdet[i][j][1] = gy[j];
+        dlogdet[i][j][2] = gz[j];
+    }
+    std::copy_n(temp.data(4), output_size, d2logdet[i]);
+}
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp, int i,
+    ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi) const
+{
+    const size_t output_size = psi.cols();
+    std::copy_n(temp.data(0), output_size, psi[i]);
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    const T* restrict hxx = temp.data(4);
+    const T* restrict hxy = temp.data(5);
+    const T* restrict hxz = temp.data(6);
+    const T* restrict hyy = temp.data(7);
+    const T* restrict hyz = temp.data(8);
+    const T* restrict hzz = temp.data(9);
+
+    for (size_t j = 0; j < output_size; j++) {
+        dpsi[i][j][0] = gx[j];
+        dpsi[i][j][1] = gy[j];
+        dpsi[i][j][2] = gz[j];
+
+        d2psi[i][j](0, 0) = hxx[j];
+        d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+        d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+        d2psi[i][j](1, 1) = hyy[j];
+        d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+        d2psi[i][j](2, 2) = hzz[j];
     }
-  }
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                         int first,
-                                         int last,
-                                         ValueMatrix& logdet,
-                                         GradMatrix& dlogdet,
-                                         HessMatrix& grad_grad_logdet)
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(
+    const vgl_type& temp, int i, GradMatrix& dpsi) const
+{
+    const size_t output_size = dpsi.cols();
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+
+    for (size_t j = 0; j < output_size; j++) {
+        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+        // that
+        //  for an atomic center, the ion gradient is the negative of the
+        //  elecron gradient. Hence minus signs for each of these.
+        dpsi[i][j][0] = -gx[j];
+        dpsi[i][j][1] = -gy[j];
+        dpsi[i][j][2] = -gz[j];
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i,
+    GradMatrix& dpsi, HessMatrix& dgpsi, GradMatrix& dlpsi) const
+{
+    const size_t output_size = dpsi.cols();
+    const T* restrict gx = temp.data(1);
+    const T* restrict gy = temp.data(2);
+    const T* restrict gz = temp.data(3);
+    const T* restrict hxx = temp.data(4);
+    const T* restrict hxy = temp.data(5);
+    const T* restrict hxz = temp.data(6);
+    const T* restrict hyy = temp.data(7);
+    const T* restrict hyz = temp.data(8);
+    const T* restrict hzz = temp.data(9);
+    const T* restrict gh_xxx = temp.data(10);
+    const T* restrict gh_xxy = temp.data(11);
+    const T* restrict gh_xxz = temp.data(12);
+    const T* restrict gh_xyy = temp.data(13);
+    const T* restrict gh_xzz = temp.data(15);
+    const T* restrict gh_yyy = temp.data(16);
+    const T* restrict gh_yyz = temp.data(17);
+    const T* restrict gh_yzz = temp.data(18);
+    const T* restrict gh_zzz = temp.data(19);
+
+    for (size_t j = 0; j < output_size; j++) {
+        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+        // that
+        //  for an atomic center, the ion gradient is the negative of the
+        //  elecron gradient. Hence minus signs for each of these.
+        dpsi[i][j][0] = -gx[j];
+        dpsi[i][j][1] = -gy[j];
+        dpsi[i][j][2] = -gz[j];
+
+        dgpsi[i][j](0, 0) = -hxx[j];
+        dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
+        dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
+        dgpsi[i][j](1, 1) = -hyy[j];
+        dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
+        dgpsi[i][j](2, 2) = -hzz[j];
+
+        // Since this returns the ion gradient of the laplacian, we have to
+        // trace the grad hessian vector.
+        dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
+        dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
+        dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
 {
-  if (Identity)
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGH(P, iat, Temph);
-      evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
-    }
-  }
-  else
-  {
-    assert(logdet.cols() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGH(P, iat, Temph);
-      Product_ABt(Temph, C_partial_view, Temphv);
-      evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
+    if (Identity) {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGL(P, iat, Temp);
+            evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
+        }
+    }
+    else {
+        assert(logdet.cols() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGL(P, iat, Temp);
+            Product_ABt(Temp, C_partial_view, Tempv);
+            evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
+        }
     }
-  }
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                         int first,
-                                         int last,
-                                         ValueMatrix& logdet,
-                                         GradMatrix& dlogdet,
-                                         HessMatrix& grad_grad_logdet,
-                                         GGGMatrix& grad_grad_grad_logdet)
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+    HessMatrix& grad_grad_logdet)
 {
-  if (Identity)
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGHGH(P, iat, Tempgh);
-      evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
-    }
-  }
-  else
-  {
-    assert(logdet.cols() <= this->OrbitalSetSize);
-    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
-      Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
-      evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    if (Identity) {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGH(P, iat, Temph);
+            evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
+        }
+    }
+    else {
+        assert(logdet.cols() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGH(P, iat, Temph);
+            Product_ABt(Temph, C_partial_view, Temphv);
+            evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
+        }
     }
-  }
 }
 
-template<class T>
-void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                       int first,
-                                       int last,
-                                       const ParticleSet& source,
-                                       int iat_src,
-                                       GradMatrix& gradphi)
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+    HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
 {
-  if (Identity)
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
-      evaluate_ionderiv_v_impl(Temp, i, gradphi);
+    if (Identity) {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+            evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet,
+                grad_grad_grad_logdet);
+        }
     }
-  }
-  else
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
-      Product_ABt(this->Temp, *C, this->Tempv);
-      evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
-    }
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                       int first,
-                                       int last,
-                                       const ParticleSet& source,
-                                       int iat_src,
-                                       GradMatrix& grad_phi,
-                                       HessMatrix& grad_grad_phi,
-                                       GradMatrix& grad_lapl_phi)
-{
-  if (Identity)
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
-      evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    else {
+        assert(logdet.cols() <= this->OrbitalSetSize);
+        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
+            Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
+            evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet,
+                grad_grad_logdet, grad_grad_grad_logdet);
+        }
     }
-  }
-  else
-  {
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    {
-      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
-      Product_ABt(this->Tempgh, *C, this->Tempghv);
-      evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
-    }
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
-                                          int iel,
-                                          const ParticleSet& source,
-                                          int iat_src,
-                                          GradVector& gradphi)
-{
-  if (Identity)
-  {
-    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
-    evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
-  }
-  else
-  {
-    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
-    Product_ABt(Temp, *C, this->Tempv);
-    evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
-  }
-}
-
-template<class T>
-void LCAOrbitalSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
-{
-  if (!use_stored_copy)
-    *C_copy = *C;
-  //gemm is out-of-place
-  BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize,
-             rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize);
-
-  /* debugging code
-  app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl;
-  for (int j = 0; j < OrbitalSetSize; j++)
-    for (int i = 0; i < BasisSetSize; i++)
-    {
-      app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific
-                << *(C->data() + j * BasisSetSize + i);
+}
 
-      if ((j * BasisSetSize + i + 1) % 4 == 0)
-        app_log() << std::endl;
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first,
+    int last, const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
+{
+    if (Identity) {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateGradSourceV(
+                P, iat, source, iat_src, this->Temp);
+            evaluate_ionderiv_v_impl(Temp, i, gradphi);
+        }
+    }
+    else {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateGradSourceV(
+                P, iat, source, iat_src, this->Temp);
+            Product_ABt(this->Temp, *C, this->Tempv);
+            evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
+        }
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first,
+    int last, const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+    HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi)
+{
+    if (Identity) {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateGradSourceVGL(
+                P, iat, source, iat_src, this->Tempgh);
+            evaluate_ionderiv_vgl_impl(
+                this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+        }
+    }
+    else {
+        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
+            myBasisSet->evaluateGradSourceVGL(
+                P, iat, source, iat_src, this->Tempgh);
+            Product_ABt(this->Tempgh, *C, this->Tempghv);
+            evaluate_ionderiv_vgl_impl(
+                this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+        }
     }
-  */
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
+    const ParticleSetT<T>& source, int iat_src, GradVector& gradphi)
+{
+    if (Identity) {
+        myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+        evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
+    }
+    else {
+        myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+        Product_ABt(Temp, *C, this->Tempv);
+        evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
+    }
+}
+
+template <class T>
+void
+LCAOrbitalSetT<T>::applyRotation(
+    const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+    if (!use_stored_copy)
+        *C_copy = *C;
+    // gemm is out-of-place
+    BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize,
+        this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize,
+        rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(),
+        BasisSetSize);
+
+    /* debugging code
+    app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName <<
+    std::endl; for (int j = 0; j < OrbitalSetSize; j++) for (int i = 0; i <
+    BasisSetSize; i++)
+      {
+        app_log() << " " << std::right << std::fixed << std::setprecision(16) <<
+    std::setw(23) << std::scientific
+                  << *(C->data() + j * BasisSetSize + i);
+
+        if ((j * BasisSetSize + i + 1) % 4 == 0)
+          app_log() << std::endl;
+      }
+    */
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
index 974add33b6..f8bf40d017 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
@@ -14,7 +14,7 @@
 
 #include "Numerics/DeterminantOperators.h"
 #include "Numerics/MatrixOperators.h"
-#include "QMCWaveFunctions/BasisSetBase.h"
+#include "QMCWaveFunctions/BasisSetBaseT.h"
 #include "QMCWaveFunctions/SPOSetT.h"
 
 #include <memory>
@@ -31,7 +31,7 @@ template <class T>
 class LCAOrbitalSetT : public SPOSetT<T>
 {
 public:
-    using basis_type = SoaBasisSetBase<T>;
+    using basis_type = SoaBasisSetBaseT<T>;
     using vgl_type = typename basis_type::vgl_type;
     using vgh_type = typename basis_type::vgh_type;
     using vghgh_type = typename basis_type::vghgh_type;
@@ -122,63 +122,63 @@ class LCAOrbitalSetT : public SPOSetT<T>
     checkObject() const final;
 
     void
-    evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
 
     void
-    evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi,
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
         GradVector& dpsi, ValueVector& d2psi) final;
 
     void
     mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
         const RefVector<ValueVector>& psi_v_list) const final;
 
     void
     mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
         const RefVector<ValueVector>& psi_v_list,
         const RefVector<GradVector>& dpsi_v_list,
         const RefVector<ValueVector>& d2psi_v_list) const final;
 
     void
     mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+        const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
         const RefVector<ValueVector>& psi_list,
         const std::vector<const T*>& invRow_ptr_list,
         std::vector<std::vector<T>>& ratios_list) const final;
 
     void
-    evaluateDetRatios(const VirtualParticleSet& VP, ValueVector& psi,
+    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
         const ValueVector& psiinv, std::vector<T>& ratios) final;
 
     void
     mw_evaluateVGLandDetRatioGrads(
         const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
         const std::vector<const T*>& invRow_ptr_list,
         OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
         std::vector<GradType>& grads) const final;
 
     void
-    evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi,
+    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
         GradVector& dpsi, HessVector& grad_grad_psi) final;
 
     void
-    evaluateVGHGH(const ParticleSet& P, int iat, ValueVector& psi,
+    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
         GradVector& dpsi, HessVector& grad_grad_psi,
         GGGVector& grad_grad_grad_psi) final;
 
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
 
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet,
         HessMatrix& grad_grad_logdet) final;
 
     void
-    evaluate_notranspose(const ParticleSet& P, int first, int last,
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
         ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
         GGGMatrix& grad_grad_grad_logdet) final;
 
@@ -242,8 +242,8 @@ class LCAOrbitalSetT : public SPOSetT<T>
      * orbitals.
      */
     void
-    evaluateGradSource(const ParticleSet& P, int first, int last,
-        const ParticleSet& source, int iat_src, GradMatrix& grad_phi) final;
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi) final;
 
     /**
      * \brief Calculate ion derivatives of SPO's, their gradients, and their
@@ -262,13 +262,13 @@ class LCAOrbitalSetT : public SPOSetT<T>
      * for all particles and all orbitals.
      */
     void
-    evaluateGradSource(const ParticleSet& P, int first, int last,
-        const ParticleSet& source, int iat_src, GradMatrix& grad_phi,
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
         HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) final;
 
     void
-    evaluateGradSourceRow(const ParticleSet& P, int iel,
-        const ParticleSet& source, int iat_src, GradVector& grad_phi) final;
+    evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
+        const ParticleSetT<T>& source, int iat_src, GradVector& grad_phi) final;
 
     void
     createResource(ResourceCollection& collection) const final;
@@ -362,13 +362,13 @@ class LCAOrbitalSetT : public SPOSetT<T>
 
     void
     mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
         OffloadMWVGLArray& phi_vgl_v) const;
 
     /// packed walker GEMM implementation
     void
     mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
         OffloadMWVArray& phi_v) const;
 
     struct LCAOMultiWalkerMem;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
index f713646d82..87b4e719d0 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
 //
@@ -9,62 +9,64 @@
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #include "LCAOrbitalSetWithCorrectionT.h"
 
 namespace qmcplusplus
 {
-template<typename T>
-LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(const std::string& my_name,
-                                                              ParticleSet& ions,
-                                                              ParticleSet& els,
-                                                              std::unique_ptr<basis_type>&& bs)
-    : SPOSetT<T>(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
-{}
+template <typename T>
+LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(
+    const std::string& my_name, ParticleSetT<T>& ions, ParticleSetT<T>& els,
+    std::unique_ptr<basis_type>&& bs) :
+    SPOSetT<T>(my_name),
+    lcao(my_name + "_modified", std::move(bs)),
+    cusp(ions, els)
+{
+}
 
-template<typename T>
-void LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
+template <typename T>
+void
+LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
 {
-  assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
-  this->OrbitalSetSize = norbs;
-  cusp.setOrbitalSetSize(norbs);
+    assert(
+        lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
+    this->OrbitalSetSize = norbs;
+    cusp.setOrbitalSetSize(norbs);
 }
 
-template<typename T>
-std::unique_ptr<SPOSetT<T>> LCAOrbitalSetWithCorrectionT<T>::makeClone() const
+template <typename T>
+std::unique_ptr<SPOSetT<T>>
+LCAOrbitalSetWithCorrectionT<T>::makeClone() const
 {
-  return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
+    return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
 }
 
-template<typename T>
-void LCAOrbitalSetWithCorrectionT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <typename T>
+void
+LCAOrbitalSetWithCorrectionT<T>::evaluateValue(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  lcao.evaluateValue(P, iat, psi);
-  cusp.addV(P, iat, psi);
+    lcao.evaluateValue(P, iat, psi);
+    cusp.addV(P, iat, psi);
 }
 
-template<typename T>
-void LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSet& P,
-                                                  int iat,
-                                                  ValueVector& psi,
-                                                  GradVector& dpsi,
-                                                  ValueVector& d2psi)
+template <typename T>
+void
+LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
-  cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
+    lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
+    cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
 }
 
-template<typename T>
-void LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSet& P,
-                                                           int first,
-                                                           int last,
-                                                           ValueMatrix& logdet,
-                                                           GradMatrix& dlogdet,
-                                                           ValueMatrix& d2logdet)
+template <typename T>
+void
+LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+    int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+    ValueMatrix& d2logdet)
 {
-  lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
-  for (size_t i = 0, iat = first; iat < last; i++, iat++)
-    cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
+    lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+        cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
 }
 
 template class LCAOrbitalSetWithCorrectionT<double>;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
index 30c3f188e6..8b0003d18f 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
@@ -9,68 +9,72 @@
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
 #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
 
-#include "QMCWaveFunctions/SPOSetT.h"
-#include "QMCWaveFunctions/BasisSetBase.h"
 #include "LCAOrbitalSetT.h"
+#include "QMCWaveFunctions/BasisSetBaseT.h"
+#include "QMCWaveFunctions/SPOSetT.h"
 #include "SoaCuspCorrectionT.h"
 
-
 namespace qmcplusplus
 {
 /** class to add cusp correction to LCAOrbitalSet.
-   *
-   */
+ *
+ */
 
-template<typename T>
+template <typename T>
 class LCAOrbitalSetWithCorrectionT : public SPOSetT<T>
 {
 public:
-  using basis_type  = typename LCAOrbitalSetT<T>::basis_type;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  /** constructor
+    using basis_type = typename LCAOrbitalSetT<T>::basis_type;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    /** constructor
      * @param ions
      * @param els
      * @param bs pointer to the BasisSet
      * @param rl report level
      */
-  LCAOrbitalSetWithCorrectionT(const std::string& my_name,
-                               ParticleSet& ions,
-                               ParticleSet& els,
-                               std::unique_ptr<basis_type>&& bs);
+    LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+        ParticleSetT<T>& ions, ParticleSetT<T>& els,
+        std::unique_ptr<basis_type>&& bs);
 
-  LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default;
+    LCAOrbitalSetWithCorrectionT(
+        const LCAOrbitalSetWithCorrectionT& in) = default;
 
-  std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; }
+    std::string
+    getClassName() const final
+    {
+        return "LCAOrbitalSetWithCorrectionT";
+    }
 
-  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const final;
 
-  void setOrbitalSetSize(int norbs) final;
+    void
+    setOrbitalSetSize(int norbs) final;
 
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
 
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) final;
 
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) final;
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
 
-  template <typename>
-  friend class LCAOrbitalBuilderT;
+    template <typename>
+    friend class LCAOrbitalBuilderT;
 
 private:
-  LCAOrbitalSetT<T> lcao;
+    LCAOrbitalSetT<T> lcao;
 
-  SoaCuspCorrectionT<T> cusp;
+    SoaCuspCorrectionT<T> cusp;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
index 110491c006..ee9ecde7fe 100644
--- a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
+++ b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
@@ -21,6 +21,7 @@
 #include "hdf/hdf_archive.h"
 #include "LCAO/MultiQuinticSpline1D.h"
 #include "LCAO/SoaAtomicBasisSet.h"
+#include "LCAO/SoaAtomicBasisSetT.h"
 
 namespace qmcplusplus
 {
@@ -145,5 +146,60 @@ class RadialOrbitalSetBuilder<SoaAtomicBasisSet<MultiFunctorAdapter<FN>, SH>> :
     m_orbitals.setRmax(0); //set Rmax
   }
 };
+
+template<typename FN, typename SH, typename ORBT>
+class RadialOrbitalSetBuilder<SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>> : public MPIObjectBase
+{
+public:
+  using COT             = SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>;
+  using RadialOrbital_t = MultiFunctorAdapter<FN>;
+  using single_type     = typename RadialOrbital_t::single_type;
+
+  ///true, if the RadialOrbitalType is normalized
+  bool Normalized;
+  ///orbitals to build
+  COT& m_orbitals;
+
+  ///constructor
+  RadialOrbitalSetBuilder(Communicate* comm, COT& aos) : MPIObjectBase(comm), Normalized(true), m_orbitals(aos) {}
+
+  ///implement functions used by AOBasisBuilder
+  bool addGrid(xmlNodePtr cur, const std::string& rad_type) { return true; }
+  bool addGridH5(hdf_archive& hin) { return true; }
+  bool openNumericalBasisH5(xmlNodePtr cur) { return true; }
+  bool put(xmlNodePtr cur)
+  {
+    const std::string a(lowerCase(getXMLAttributeValue(cur, "normalized")));
+    if (a == "no")
+      Normalized = false;
+    return true;
+  }
+
+  bool addRadialOrbital(xmlNodePtr cur, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroup(cur);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+    return true;
+  }
+
+  bool addRadialOrbitalH5(hdf_archive& hin, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroupH5(hin, *myComm);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+
+    return true;
+  }
+
+  void finalize()
+  {
+    m_orbitals.setRmax(0); //set Rmax
+  }
+};
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
index 71e36230bd..4d03b3d652 100644
--- a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
+++ b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
@@ -166,7 +166,7 @@ bool RadialOrbitalSetBuilder<COT>::addGrid(xmlNodePtr cur, const std::string& ra
     hin.pop();
   }
   else
-    input_grid = OneDimGridFactory::createGrid(cur);
+    input_grid = OneDimGridFactory<RealType>::createGrid(cur);
 
   //set zero to use std::max
   m_rcut_safe = 0;
diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
new file mode 100644
index 0000000000..1f1bc53d5e
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
@@ -0,0 +1,775 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file SoaAtomicBasisSetT.h
+ */
+#ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+#define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+
+#include "CPU/math.hpp"
+#include "OptimizableObject.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+struct CorrectPhaseFunctor
+{
+    const TinyVector<double, 3>& superTwist;
+
+    template <typename PosType>
+    T
+    operator()(PosType Tv) const
+    {
+        return 1.0;
+    }
+};
+
+template <typename T>
+struct CorrectPhaseFunctor<std::complex<T>>
+{
+    const TinyVector<double, 3>& superTwist;
+
+    template <typename PosType>
+    std::complex<T>
+    operator()(PosType Tv) const
+    {
+        T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] +
+            superTwist[2] * Tv[2];
+        T s, c;
+        qmcplusplus::sincos(-phasearg, &s, &c);
+        return {c, s};
+    };
+};
+
+/* A basis set for a center type
+ *
+ * @tparam ROT : radial function type, e.g.,NGFunctor<RealType>
+ * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion
+ *
+ * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$
+ */
+template <typename ROT, typename SH, typename ORBT>
+struct SoaAtomicBasisSetT
+{
+    using RadialOrbital_t = ROT;
+    using RealType = typename ROT::RealType;
+    using GridType = typename ROT::GridType;
+    using ValueType = ORBT;
+
+    /// size of the basis set
+    int BasisSetSize;
+    /// Number of Cell images for the evaluation of the orbital with PBC. If No
+    /// PBC, should be 0;
+    TinyVector<int, 3> PBCImages;
+    /// Coordinates of SuperTwist
+    TinyVector<double, 3> SuperTwist;
+    /// Phase Factor array
+    std::vector<ValueType> periodic_image_phase_factors;
+    /// maximum radius of this center
+    RealType Rmax;
+    /// spherical harmonics
+    SH Ylm;
+    /// radial orbitals
+    ROT MultiRnl;
+    /// index of the corresponding real Spherical Harmonic with quantum numbers
+    /// \f$ (l,m) \f$
+    aligned_vector<int> LM;
+    /**index of the corresponding radial orbital with quantum numbers \f$ (n,l)
+     * \f$ */
+    aligned_vector<int> NL;
+    /// container for the quantum-numbers
+    std::vector<QuantumNumberType> RnlID;
+    /// temporary storage
+    VectorSoaContainer<RealType, 4> tempS;
+
+    /// the constructor
+    explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) :
+        Ylm(lmax, addsignforM)
+    {
+    }
+
+    void
+    checkInVariables(opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkInVariables(active);
+    }
+
+    void
+    checkOutVariables(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkOutVariables(active);
+    }
+
+    void
+    resetParameters(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->resetParameters(active);
+    }
+
+    /** return the number of basis functions
+     */
+    inline int
+    getBasisSetSize() const
+    {
+        //=NL.size();
+        return BasisSetSize;
+    }
+
+    /** Set the number of periodic image for the evaluation of the orbitals and
+     * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1),
+     * SuperTwist(0,0,0) and the PhaseFactor=1.
+     */
+    void
+    setPBCParams(const TinyVector<int, 3>& pbc_images,
+        const TinyVector<double, 3> supertwist,
+        const std::vector<ValueType>& PeriodicImagePhaseFactors)
+    {
+        PBCImages = pbc_images;
+        periodic_image_phase_factors = PeriodicImagePhaseFactors;
+        SuperTwist = supertwist;
+    }
+
+    /** implement a BasisSetBase virtual function
+     *
+     * Set Rmax and BasisSetSize
+     * @todo Should be able to overwrite Rmax to be much smaller than the
+     * maximum grid
+     */
+    inline void
+    setBasisSetSize(int n)
+    {
+        BasisSetSize = LM.size();
+        tempS.resize(std::max(Ylm.size(), RnlID.size()));
+    }
+
+    /** Set Rmax */
+    template <typename RealType>
+    inline void
+    setRmax(RealType rmax)
+    {
+        Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
+    }
+
+    /// set the current offset
+    inline void
+    setCenter(int c, int offset)
+    {
+    }
+
+    /// Sets a boolean vector for S-type orbitals.  Used for cusp correction.
+    void
+    queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
+    {
+        for (int i = 0; i < BasisSetSize; i++) {
+            s_orbitals[i] = (RnlID[NL[i]][1] == 0);
+        }
+    }
+
+    /** evaluate VGL
+     */
+    template <typename LAT, typename PosType, typename VGL>
+    inline void
+    evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGL& vgl, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+        // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
+
+        const ValueType correctphase =
+            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+
+        constexpr RealType cone(1);
+        constexpr RealType ctwo(2);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgl.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgl.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgl.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgl.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+        auto* restrict d2psi = vgl.data(4) + offset;
+        const RealType* restrict ylm_l = Ylm[4]; // lap
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            d2psi[ib] = 0;
+        }
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGL(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr * Phase;
+                        dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
+                        dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
+                        dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
+                        d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) +
+                                         ctwo *
+                                             (gr_x * ang_x + gr_y * ang_y +
+                                                 gr_z * ang_z) +
+                                         vr * ylm_l[lm]) *
+                            Phase;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGH>
+    inline void
+    evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGH& vgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+            //      d2psi[ib]  = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.
+                        const RealType gr2_tmp =
+                            rinv * rinv * (d2phi[nl] - drnloverr);
+                        const RealType gr_xx = x * x * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * y * gr2_tmp;
+                        const RealType gr_xz = x * z * gr2_tmp;
+                        const RealType gr_yy = y * y * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * z * gr2_tmp;
+                        const RealType gr_zz = z * z * gr2_tmp + drnloverr;
+
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGHGH>
+    inline void
+    evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGHGH& vghgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+        RealType* restrict d3phi = tempS.data(3);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vghgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vghgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vghgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vghgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vghgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vghgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vghgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vghgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vghgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vghgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
+        const RealType* restrict ylm_xxx = Ylm[10];
+        auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
+        const RealType* restrict ylm_xxy = Ylm[11];
+        auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
+        const RealType* restrict ylm_xxz = Ylm[12];
+        auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
+        const RealType* restrict ylm_xyy = Ylm[13];
+        auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
+        const RealType* restrict ylm_xyz = Ylm[14];
+        auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
+        const RealType* restrict ylm_xzz = Ylm[15];
+        auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
+        const RealType* restrict ylm_yyy = Ylm[16];
+        auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
+        const RealType* restrict ylm_yyz = Ylm[17];
+        auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
+        const RealType* restrict ylm_yzz = Ylm[18];
+        auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
+        const RealType* restrict ylm_zzz = Ylm[19];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+
+            dghpsi_xxx[ib] = 0;
+            dghpsi_xxy[ib] = 0;
+            dghpsi_xxz[ib] = 0;
+            dghpsi_xyy[ib] = 0;
+            dghpsi_xyz[ib] = 0;
+            dghpsi_xzz[ib] = 0;
+            dghpsi_yyy[ib] = 0;
+            dghpsi_yyz[ib] = 0;
+            dghpsi_yzz[ib] = 0;
+            dghpsi_zzz[ib] = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGHGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
+
+                    const RealType rinv = cone / r_new;
+                    const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.  This is p(r) in the notes.
+                        const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr);
+
+                        const RealType gr_xx = x * xu * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * yu * gr2_tmp;
+                        const RealType gr_xz = x * zu * gr2_tmp;
+                        const RealType gr_yy = y * yu * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * zu * gr2_tmp;
+                        const RealType gr_zz = z * zu * gr2_tmp + drnloverr;
+
+                        // This is q(r) in the notes.
+                        const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp;
+
+                        const RealType gr_xxx =
+                            xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
+                        const RealType gr_xxy =
+                            xu * xu * yu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_xxz =
+                            xu * xu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_xyy =
+                            xu * yu * yu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_xyz = xu * yu * zu * gr3_tmp;
+                        const RealType gr_xzz =
+                            xu * zu * zu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_yyy =
+                            yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
+                        const RealType gr_yyz =
+                            yu * yu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_yzz =
+                            yu * zu * zu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_zzz =
+                            zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
+
+                        // Angular derivatives up to third
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType ang_xxx = ylm_xxx[lm];
+                        const RealType ang_xxy = ylm_xxy[lm];
+                        const RealType ang_xxz = ylm_xxz[lm];
+                        const RealType ang_xyy = ylm_xyy[lm];
+                        const RealType ang_xyz = ylm_xyz[lm];
+                        const RealType ang_xzz = ylm_xzz[lm];
+                        const RealType ang_yyy = ylm_yyy[lm];
+                        const RealType ang_yyz = ylm_yyz[lm];
+                        const RealType ang_yzz = ylm_yzz[lm];
+                        const RealType ang_zzz = ylm_zzz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+
+                        dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx +
+                            3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx;
+                        dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy +
+                            gr_xx * ang_y + ang_xx * gr_y +
+                            2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x;
+                        dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz +
+                            gr_xx * ang_z + ang_xx * gr_z +
+                            2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x;
+                        dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy +
+                            gr_yy * ang_x + ang_yy * gr_x +
+                            2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y;
+                        dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz +
+                            gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
+                            ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y;
+                        dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz +
+                            gr_zz * ang_x + ang_zz * gr_x +
+                            2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z;
+                        dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy +
+                            3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy;
+                        dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz +
+                            gr_yy * ang_z + ang_yy * gr_z +
+                            2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y;
+                        dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz +
+                            gr_zz * ang_y + ang_zz * gr_y +
+                            2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z;
+                        dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz +
+                            3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz;
+                    }
+                }
+            }
+        }
+    }
+
+    /** evaluate V
+     */
+    template <typename LAT, typename PosType, typename VT>
+    inline void
+    evaluateV(const LAT& lattice, const RealType r, const PosType& dr,
+        VT* restrict psi, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        const ValueType correctphase =
+            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+
+        RealType* restrict ylm_v = tempS.data(0);
+        RealType* restrict phi_r = tempS.data(1);
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib)
+            psi[ib] = 0;
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
+                    MultiRnl.evaluate(r_new, phi_r);
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib)
+                        psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
+                }
+            }
+        }
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
index 57a1312447..85c17ef568 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2021 QMCPACK developers.
 //
@@ -9,160 +9,173 @@
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 /** @file SoaCuspCorrectionT.cpp
  */
 #include "SoaCuspCorrectionT.h"
+
 #include "SoaCuspCorrectionBasisSet.h"
+#include "Particle/DistanceTableT.h"
 
 namespace qmcplusplus
 {
-template<class T>
-SoaCuspCorrectionT<T>::SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions))
+template <class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(
+    ParticleSetT<T>& ions, ParticleSetT<T>& els) :
+    myTableIndex(els.addTable(ions))
 {
-  NumCenters = ions.getTotalNum();
-  NumTargets = els.getTotalNum();
-  LOBasisSet.resize(NumCenters);
+    NumCenters = ions.getTotalNum();
+    NumTargets = els.getTotalNum();
+    LOBasisSet.resize(NumCenters);
 }
 
-template<class T>
-SoaCuspCorrectionT<T>::SoaCuspCorrectionT(const SoaCuspCorrectionT<T>& a) = default;
+template <class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(
+    const SoaCuspCorrectionT<T>& a) = default;
 
-template<class T>
-void SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
+template <class T>
+void
+SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
 {
-  MaxOrbSize = norbs;
-  myVGL.resize(5, MaxOrbSize);
+    MaxOrbSize = norbs;
+    myVGL.resize(5, MaxOrbSize);
 }
 
-template<class T>
-inline void SoaCuspCorrectionT<T>::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl)
+template <class T>
+inline void
+SoaCuspCorrectionT<T>::evaluateVGL(
+    const ParticleSetT<T>& P, int iat, VGLVector& vgl)
 {
-  assert(MaxOrbSize >= vgl.size());
-  myVGL = 0.0;
-
-  const auto& d_table = P.getDistTableAB(myTableIndex);
-  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
-  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
-  for (int c = 0; c < NumCenters; c++)
-    if (LOBasisSet[c])
-      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
-
-  {
-    const auto v_in  = myVGL[0];
-    const auto gx_in = myVGL[1];
-    const auto gy_in = myVGL[2];
-    const auto gz_in = myVGL[3];
-    const auto l_in  = myVGL[4];
-    auto v_out       = vgl.data(0);
-    auto gx_out      = vgl.data(1);
-    auto gy_out      = vgl.data(2);
-    auto gz_out      = vgl.data(3);
-    auto l_out       = vgl.data(4);
-    for (size_t i = 0; i < vgl.size(); ++i)
+    assert(MaxOrbSize >= vgl.size());
+    myVGL = 0.0;
+
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++)
+        if (LOBasisSet[c])
+            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
+                myVGL[2], myVGL[3], myVGL[4]);
+
     {
-      v_out[i] += v_in[i];
-      gx_out[i] += gx_in[i];
-      gy_out[i] += gy_in[i];
-      gz_out[i] += gz_in[i];
-      l_out[i] += l_in[i];
+        const auto v_in = myVGL[0];
+        const auto gx_in = myVGL[1];
+        const auto gy_in = myVGL[2];
+        const auto gz_in = myVGL[3];
+        const auto l_in = myVGL[4];
+        auto v_out = vgl.data(0);
+        auto gx_out = vgl.data(1);
+        auto gy_out = vgl.data(2);
+        auto gz_out = vgl.data(3);
+        auto l_out = vgl.data(4);
+        for (size_t i = 0; i < vgl.size(); ++i) {
+            v_out[i] += v_in[i];
+            gx_out[i] += gx_in[i];
+            gy_out[i] += gy_in[i];
+            gz_out[i] += gz_in[i];
+            l_out[i] += l_in[i];
+        }
     }
-  }
 }
 
-template<class T>
-void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
-                                         int iat,
-                                         ValueVector& psi,
-                                         GradVector& dpsi,
-                                         ValueVector& d2psi)
+template <class T>
+void
+SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  assert(MaxOrbSize >= psi.size());
-  myVGL = 0.0;
-
-  const auto& d_table = P.getDistTableAB(myTableIndex);
-  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
-  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
-  for (int c = 0; c < NumCenters; c++)
-    if (LOBasisSet[c])
-      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
-
-  const auto v_in  = myVGL[0];
-  const auto gx_in = myVGL[1];
-  const auto gy_in = myVGL[2];
-  const auto gz_in = myVGL[3];
-  const auto l_in  = myVGL[4];
-  for (size_t i = 0; i < psi.size(); ++i)
-  {
-    psi[i] += v_in[i];
-    dpsi[i][0] += gx_in[i];
-    dpsi[i][1] += gy_in[i];
-    dpsi[i][2] += gz_in[i];
-    d2psi[i] += l_in[i];
-  }
+    assert(MaxOrbSize >= psi.size());
+    myVGL = 0.0;
+
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++)
+        if (LOBasisSet[c])
+            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
+                myVGL[2], myVGL[3], myVGL[4]);
+
+    const auto v_in = myVGL[0];
+    const auto gx_in = myVGL[1];
+    const auto gy_in = myVGL[2];
+    const auto gz_in = myVGL[3];
+    const auto l_in = myVGL[4];
+    for (size_t i = 0; i < psi.size(); ++i) {
+        psi[i] += v_in[i];
+        dpsi[i][0] += gx_in[i];
+        dpsi[i][1] += gy_in[i];
+        dpsi[i][2] += gz_in[i];
+        d2psi[i] += l_in[i];
+    }
 }
 
-template<class T>
-void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
-                                         int iat,
-                                         int idx,
-                                         ValueMatrix& psi,
-                                         GradMatrix& dpsi,
-                                         ValueMatrix& d2psi)
+template <class T>
+void
+SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P, int iat, int idx,
+    ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi)
 {
-  assert(MaxOrbSize >= psi.cols());
-  myVGL = 0.0;
-
-  const auto& d_table = P.getDistTableAB(myTableIndex);
-  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
-  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
-  for (int c = 0; c < NumCenters; c++)
-    if (LOBasisSet[c])
-      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
-
-  const auto v_in  = myVGL[0];
-  const auto gx_in = myVGL[1];
-  const auto gy_in = myVGL[2];
-  const auto gz_in = myVGL[3];
-  const auto l_in  = myVGL[4];
-  for (size_t i = 0; i < psi.cols(); ++i)
-  {
-    psi[idx][i] += v_in[i];
-    dpsi[idx][i][0] += gx_in[i];
-    dpsi[idx][i][1] += gy_in[i];
-    dpsi[idx][i][2] += gz_in[i];
-    d2psi[idx][i] += l_in[i];
-  }
+    assert(MaxOrbSize >= psi.cols());
+    myVGL = 0.0;
+
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++)
+        if (LOBasisSet[c])
+            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
+                myVGL[2], myVGL[3], myVGL[4]);
+
+    const auto v_in = myVGL[0];
+    const auto gx_in = myVGL[1];
+    const auto gy_in = myVGL[2];
+    const auto gz_in = myVGL[3];
+    const auto l_in = myVGL[4];
+    for (size_t i = 0; i < psi.cols(); ++i) {
+        psi[idx][i] += v_in[i];
+        dpsi[idx][i][0] += gx_in[i];
+        dpsi[idx][i][1] += gy_in[i];
+        dpsi[idx][i][2] += gz_in[i];
+        d2psi[idx][i] += l_in[i];
+    }
 }
 
-template<class T>
-void SoaCuspCorrectionT<T>::evaluateV(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+SoaCuspCorrectionT<T>::evaluateV(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  assert(MaxOrbSize >= psi.size());
-  T* tmp_vals = myVGL[0];
+    assert(MaxOrbSize >= psi.size());
+    T* tmp_vals = myVGL[0];
 
-  std::fill_n(tmp_vals, myVGL.size(), 0.0);
+    std::fill_n(tmp_vals, myVGL.size(), 0.0);
 
-  const auto& d_table = P.getDistTableAB(myTableIndex);
-  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
 
-  //THIS IS SERIAL, only way to avoid this is to use myVGL
-  for (int c = 0; c < NumCenters; c++)
-    if (LOBasisSet[c])
-      LOBasisSet[c]->evaluate(dist[c], tmp_vals);
+    // THIS IS SERIAL, only way to avoid this is to use myVGL
+    for (int c = 0; c < NumCenters; c++)
+        if (LOBasisSet[c])
+            LOBasisSet[c]->evaluate(dist[c], tmp_vals);
 
-  { //collect
-    const auto v_in = myVGL[0];
-    for (size_t i = 0; i < psi.size(); ++i)
-      psi[i] += v_in[i];
-  }
+    { // collect
+        const auto v_in = myVGL[0];
+        for (size_t i = 0; i < psi.size(); ++i)
+            psi[i] += v_in[i];
+    }
 }
 
-template<class T>
-void SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
+template <class T>
+void
+SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
 {
-  assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!");
-  LOBasisSet[icenter].reset(aos.release());
+    assert(MaxOrbSize == aos->getNumOrbs() &&
+        "All the centers should support the same number of orbitals!");
+    LOBasisSet[icenter].reset(aos.release());
 }
 
 template class SoaCuspCorrectionT<double>;
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
index dca3912f90..0edf61af87 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2021 QMCPACK developers.
 //
@@ -9,18 +9,16 @@
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 /** @file SoaCuspCorrectionT.h
  */
 #ifndef QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H
 #define QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H
 
-#include "Configuration.h"
 #include "QMCWaveFunctions/SPOSetT.h"
 
 namespace qmcplusplus
 {
-template<typename T>
+template <typename T>
 class CuspCorrectionAtomicBasis;
 
 /** A localized basis set derived from BasisSetBase<typename COT::ValueType>
@@ -30,88 +28,110 @@ class CuspCorrectionAtomicBasis;
  * The template parameter COT denotes Centered-Orbital-Type which provides
  * a set of localized orbitals associated with a center.
  */
-template<class T>
+template <class T>
 class SoaCuspCorrectionT
 {
-  using RealType    = typename SPOSetT<T>::RealType;
-  using VGLVector   = VectorSoaContainer<T, 5>;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using PosType     = typename SPOSetT<T>::PosType;
-
-  ///number of centers, e.g., ions
-  size_t NumCenters;
-  ///number of quantum particles
-  size_t NumTargets;
-  ///number of quantum particles
-  const int myTableIndex;
-  /** Maximal number of supported MOs
-   * this is not the AO basis because cusp correction is applied on the MO directly.
-   */
-  int MaxOrbSize = 0;
-
-  ///COMPLEX WON'T WORK
-  using COT = CuspCorrectionAtomicBasis<RealType>;
-
-  /** container of the unique pointers to the Atomic Orbitals
-   *
-   * size of LOBasisSet = number of centers (atoms)
-   * should use unique_ptr once COT is fixed for better performance
-   */
-  std::vector<std::shared_ptr<const COT>> LOBasisSet;
-
-  Matrix<RealType> myVGL;
+    using RealType = typename SPOSetT<T>::RealType;
+    using VGLVector = VectorSoaContainer<T, 5>;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using PosType = typename SPOSetT<T>::PosType;
+
+    /// number of centers, e.g., ions
+    size_t NumCenters;
+    /// number of quantum particles
+    size_t NumTargets;
+    /// number of quantum particles
+    const int myTableIndex;
+    /** Maximal number of supported MOs
+     * this is not the AO basis because cusp correction is applied on the MO
+     * directly.
+     */
+    int MaxOrbSize = 0;
+
+    /// COMPLEX WON'T WORK
+    using COT = CuspCorrectionAtomicBasis<RealType>;
+
+    /** container of the unique pointers to the Atomic Orbitals
+     *
+     * size of LOBasisSet = number of centers (atoms)
+     * should use unique_ptr once COT is fixed for better performance
+     */
+    std::vector<std::shared_ptr<const COT>> LOBasisSet;
+
+    Matrix<RealType> myVGL;
 
 public:
-  /** constructor
-   * @param ions ionic system
-   * @param els electronic system
-   */
-  SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els);
-
-  /** copy constructor */
-  SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
-
-  /** set the number of orbitals this cusp correction may serve. call this before adding any correction centers.
-   */
-  void setOrbitalSetSize(int norbs);
-
-  /** compute VGL
-   * @param P quantum particleset
-   * @param iat active particle
-   * @param vgl Matrix(5,BasisSetSize)
-   * @param trialMove if true, use getTempDists()/getTempDispls()
-   */
-  void evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl);
-
-  void evaluate_vgl(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-
-  void evaluate_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi);
-
-  /** compute values for the iat-paricle move
-   *
-   * Always uses getTempDists() and getTempDispls()
-   */
-  void evaluateV(const ParticleSet& P, int iat, ValueVector& psi);
-
-  /** add a new set of Centered Atomic Orbitals
-   * @param icenter the index of the center
-   * @param aos a set of Centered Atomic Orbitals
-   */
-  void add(int icenter, std::unique_ptr<COT> aos);
-
-  void addVGL(const ParticleSet& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); }
-  void addV(const ParticleSet& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); }
-  void add_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi)
-  {
-    evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
-  }
-  void add_vector_vgl(const ParticleSet& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi)
-  {
-    evaluate_vgl(P, iat, vals, dpsi, d2psi);
-  }
+    /** constructor
+     * @param ions ionic system
+     * @param els electronic system
+     */
+    SoaCuspCorrectionT(ParticleSetT<T>& ions, ParticleSetT<T>& els);
+
+    /** copy constructor */
+    SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
+
+    /** set the number of orbitals this cusp correction may serve. call this
+     * before adding any correction centers.
+     */
+    void
+    setOrbitalSetSize(int norbs);
+
+    /** compute VGL
+     * @param P quantum particleset
+     * @param iat active particle
+     * @param vgl Matrix(5,BasisSetSize)
+     * @param trialMove if true, use getTempDists()/getTempDispls()
+     */
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl);
+
+    void
+    evaluate_vgl(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi);
+
+    void
+    evaluate_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& psi,
+        GradMatrix& dpsi, ValueMatrix& d2psi);
+
+    /** compute values for the iat-paricle move
+     *
+     * Always uses getTempDists() and getTempDispls()
+     */
+    void
+    evaluateV(const ParticleSetT<T>& P, int iat, ValueVector& psi);
+
+    /** add a new set of Centered Atomic Orbitals
+     * @param icenter the index of the center
+     * @param aos a set of Centered Atomic Orbitals
+     */
+    void
+    add(int icenter, std::unique_ptr<COT> aos);
+
+    void
+    addVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl)
+    {
+        evaluateVGL(P, iat, vgl);
+    }
+    void
+    addV(const ParticleSetT<T>& P, int iat, ValueVector& psi)
+    {
+        evaluateV(P, iat, psi);
+    }
+    void
+    add_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& vals,
+        GradMatrix& dpsi, ValueMatrix& d2psi)
+    {
+        evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
+    }
+    void
+    add_vector_vgl(const ParticleSetT<T>& P, int iat, ValueVector& vals,
+        GradVector& dpsi, ValueVector& d2psi)
+    {
+        evaluate_vgl(P, iat, vals, dpsi, d2psi);
+    }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
new file mode 100644
index 0000000000..7b62735768
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
@@ -0,0 +1,469 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SoaLocalizedBasisSetT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "Particle/DistanceTableT.h"
+#include "SoaAtomicBasisSetT.h"
+
+#include <memory>
+
+namespace qmcplusplus
+{
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els) :
+    ions_(ions),
+    myTableIndex(els.addTable(ions,
+        DTModes::NEED_FULL_TABLE_ANYTIME |
+            DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
+    SuperTwist(0.0)
+{
+    NumCenters = ions.getTotalNum();
+    NumTargets = els.getTotalNum();
+    LOBasisSet.resize(ions.getSpeciesSet().getTotalNum());
+    BasisOffset.resize(NumCenters + 1);
+    BasisSetSize = 0;
+}
+
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    const SoaLocalizedBasisSetT& a) :
+    SoaBasisSetBaseT<ORBT>(a),
+    NumCenters(a.NumCenters),
+    NumTargets(a.NumTargets),
+    ions_(a.ions_),
+    myTableIndex(a.myTableIndex),
+    SuperTwist(a.SuperTwist),
+    BasisOffset(a.BasisOffset)
+{
+    LOBasisSet.reserve(a.LOBasisSet.size());
+    for (auto& elem : a.LOBasisSet)
+        LOBasisSet.push_back(std::make_unique<COT>(*elem));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setPBCParams(
+    const TinyVector<int, 3>& PBCImages, const TinyVector<double, 3> Sup_Twist,
+    const std::vector<ORBT>& phase_factor)
+{
+    for (int i = 0; i < LOBasisSet.size(); ++i)
+        LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor);
+
+    SuperTwist = Sup_Twist;
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setBasisSetSize(int nbs)
+{
+    const auto& IonID(ions_.GroupID);
+    if (BasisSetSize > 0 && nbs == BasisSetSize)
+        return;
+
+    if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) {
+        // evaluate the total basis dimension and offset for each center
+        BasisOffset[0] = 0;
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c + 1] =
+                BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize();
+        BasisSetSize = BasisOffset[NumCenters];
+    }
+    else {
+        // when particles are reordered due to grouping, AOs need to restore the
+        // input order to match MOs.
+        std::vector<int> map_input_to_storage(mapping.size());
+        for (int c = 0; c < NumCenters; c++)
+            map_input_to_storage[mapping[c]] = c;
+
+        std::vector<size_t> basis_offset_input_order(BasisOffset.size(), 0);
+        for (int c = 0; c < NumCenters; c++)
+            basis_offset_input_order[c + 1] = basis_offset_input_order[c] +
+                LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize();
+
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c] = basis_offset_input_order[mapping[c]];
+
+        BasisSetSize = basis_offset_input_order[NumCenters];
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::queryOrbitalsForSType(
+    const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const
+{
+    const auto& IonID(ions_.GroupID);
+    for (int c = 0; c < NumCenters; c++) {
+        int idx = BasisOffset[c];
+        int bss = LOBasisSet[IonID[c]]->BasisSetSize;
+        std::vector<bool> local_is_s_orbital(bss);
+        LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital);
+        for (int k = 0; k < bss; k++) {
+            if (corrCenter[c]) {
+                is_s_orbital[idx++] = local_is_s_orbital[k];
+            }
+            else {
+                is_s_orbital[idx++] = false;
+            }
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGL(
+    const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateVGL(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateVGL(
+    const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat,
+    OffloadMWVGLArray& vgl_v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++) {
+        const auto& IonID(ions_.GroupID);
+        const auto& coordR = P_list[iw].activeR(iat);
+        const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
+        const auto& dist = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDists() :
+            d_table.getDistRow(iat);
+        const auto& displ = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDispls() :
+            d_table.getDisplRow(iat);
+
+        PosType Tv;
+
+        // number of walkers * BasisSetSize
+        auto stride = vgl_v.size(1) * BasisSetSize;
+        assert(BasisSetSize == vgl_v.size(2));
+        vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);
+
+        for (int c = 0; c < NumCenters; c++) {
+            Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+            Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+            Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+            LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c],
+                displ[c], BasisOffset[c], vgl_iw, Tv);
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGH(
+    const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGHGH(
+    const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh)
+{
+    // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n");
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGHGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateV(
+    const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateV(
+            P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateValue(
+    const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat,
+    OffloadMWVArray& v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++)
+        evaluateV(P_list[iw], iat, v.data_at(iw, 0));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceV(
+    const ParticleSetT<ORBT>& P, int iat, const ParticleSetT<ORBT>& ions,
+    int jion, vgl_type& vgl)
+{
+    // We need to zero out the temporary array vgl.
+    auto* restrict gx = vgl.data(1);
+    auto* restrict gy = vgl.data(2);
+    auto* restrict gz = vgl.data(3);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+    }
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    Tv[0] = Tv[1] = Tv[2] = 0;
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+    LOBasisSet[IonID[jion]]->evaluateVGL(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceVGL(
+    const ParticleSetT<ORBT>& P, int iat, const ParticleSetT<ORBT>& ions,
+    int jion, vghgh_type& vghgh)
+{
+    // We need to zero out the temporary array vghgh.
+    auto* restrict gx = vghgh.data(1);
+    auto* restrict gy = vghgh.data(2);
+    auto* restrict gz = vghgh.data(3);
+
+    auto* restrict hxx = vghgh.data(4);
+    auto* restrict hxy = vghgh.data(5);
+    auto* restrict hxz = vghgh.data(6);
+    auto* restrict hyy = vghgh.data(7);
+    auto* restrict hyz = vghgh.data(8);
+    auto* restrict hzz = vghgh.data(9);
+
+    auto* restrict gxxx = vghgh.data(10);
+    auto* restrict gxxy = vghgh.data(11);
+    auto* restrict gxxz = vghgh.data(12);
+    auto* restrict gxyy = vghgh.data(13);
+    auto* restrict gxyz = vghgh.data(14);
+    auto* restrict gxzz = vghgh.data(15);
+    auto* restrict gyyy = vghgh.data(16);
+    auto* restrict gyyz = vghgh.data(17);
+    auto* restrict gyzz = vghgh.data(18);
+    auto* restrict gzzz = vghgh.data(19);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+
+        hxx[ib] = 0;
+        hxy[ib] = 0;
+        hxz[ib] = 0;
+        hyy[ib] = 0;
+        hyz[ib] = 0;
+        hzz[ib] = 0;
+
+        gxxx[ib] = 0;
+        gxxy[ib] = 0;
+        gxxz[ib] = 0;
+        gxyy[ib] = 0;
+        gxyz[ib] = 0;
+        gxzz[ib] = 0;
+        gyyy[ib] = 0;
+        gyyz[ib] = 0;
+        gyzz[ib] = 0;
+        gzzz[ib] = 0;
+    }
+
+    // Since jion is indexed on the source ions not the ions_ the distinction
+    // between ions_ and ions is extremely important.
+    const auto& IonID(ions.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+
+    LOBasisSet[IonID[jion]]->evaluateVGHGH(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::add(int icenter, std::unique_ptr<COT> aos)
+{
+    LOBasisSet[icenter] = std::move(aos);
+}
+
+// TODO: this should be redone with template template parameters
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
new file mode 100644
index 0000000000..6f2e412413
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
@@ -0,0 +1,190 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file SoaLocalizedBasisSetT.h
+ * @brief A derived class from BasisSetBase
+ *
+ * This is intended as a replacement for MolecularWaveFunctionComponent and
+ * any other localized basis set.
+ */
+#ifndef QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+#define QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "QMCWaveFunctions/BasisSetBaseT.h"
+
+#include <memory>
+
+namespace qmcplusplus
+{
+/** A localized basis set derived from SoaBasisSetBase<ORBT>
+ *
+ * This class performs the evaluation of the basis functions and their
+ * derivatives for each of the N-particles in a configuration.
+ * The template parameter COT denotes Centered-Orbital-Type which provides
+ * a set of localized orbitals associated with a center.
+ * The template parameter ORBT denotes the orbital value return type
+ */
+template <class COT, typename ORBT>
+class SoaLocalizedBasisSetT : public SoaBasisSetBaseT<ORBT>
+{
+public:
+    using RealType = typename COT::RealType;
+    using BaseType = SoaBasisSetBaseT<ORBT>;
+    using ValueType = ORBT;
+
+    using vgl_type = typename BaseType::vgl_type;
+    using vgh_type = typename BaseType::vgh_type;
+    using vghgh_type = typename BaseType::vghgh_type;
+    using PosType = typename ParticleSetT<ORBT>::PosType;
+    using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray;
+    using OffloadMWVArray = typename BaseType::OffloadMWVArray;
+
+    using BaseType::BasisSetSize;
+
+    /// number of centers, e.g., ions
+    size_t NumCenters;
+    /// number of quantum particles
+    size_t NumTargets;
+    /// ion particle set
+    const ParticleSetT<ORBT>& ions_;
+    /// number of quantum particles
+    const int myTableIndex;
+    /// Global Coordinate of Supertwist read from HDF5
+    PosType SuperTwist;
+
+    /** container to store the offsets of the basis functions for each center
+     * Due to potential reordering of ions, offsets can be in any order.
+     */
+    std::vector<size_t> BasisOffset;
+
+    /** container of the unique pointers to the Atomic Orbitals
+     *
+     * size of LOBasisSet = number  of unique centers
+     */
+    std::vector<std::unique_ptr<COT>> LOBasisSet;
+
+    /** constructor
+     * @param ions ionic system
+     * @param els electronic system
+     */
+    SoaLocalizedBasisSetT(ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els);
+
+    /** copy constructor */
+    SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a);
+
+    /** makeClone */
+    BaseType*
+    makeClone() const override
+    {
+        return new SoaLocalizedBasisSetT<COT, ORBT>(*this);
+    }
+
+    /** set Number of periodic Images to evaluate the orbitals.
+        Set to 0 for non-PBC, and set manually in the input.
+        Passes the pre-computed phase factor for evaluation of complex
+       wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or
+       -1 if non-Gamma.
+    */
+    void
+    setPBCParams(const TinyVector<int, 3>& PBCImages,
+        const TinyVector<double, 3> Sup_Twist,
+        const std::vector<ORBT>& phase_factor);
+
+    /** set BasisSetSize and allocate mVGL container
+     */
+    void
+    setBasisSetSize(int nbs) override;
+
+    /**  Determine which orbitals are S-type.  Used by cusp correction.
+     */
+    void
+    queryOrbitalsForSType(const std::vector<bool>& corrCenter,
+        std::vector<bool>& is_s_orbital) const override;
+
+    /** compute VGL
+     * @param P quantum particleset
+     * @param iat active particle
+     * @param vgl Matrix(5,BasisSetSize)
+     * @param trialMove if true, use getTempDists()/getTempDispls()
+     */
+    void
+    evaluateVGL(const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl) override;
+
+    /** compute V using packed array with all walkers
+     * @param P_list list of quantum particleset (one for each walker)
+     * @param iat active particle
+     * @param v   Array(n_walkers, BasisSetSize)
+     */
+    void
+    mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
+        int iat, OffloadMWVArray& v) override;
+
+    /** compute VGL using packed array with all walkers
+     * @param P_list list of quantum particleset (one for each walker)
+     * @param iat active particle
+     * @param vgl   Array(n_walkers, 5, BasisSetSize)
+     */
+    void
+    mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
+        int iat, OffloadMWVGLArray& vgl) override;
+
+    /** compute VGH
+     * @param P quantum particleset
+     * @param iat active particle
+     * @param vgl Matrix(10,BasisSetSize)
+     * @param trialMove if true, use getTempDists()/getTempDispls()
+     */
+    void
+    evaluateVGH(const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh) override;
+
+    /** compute VGHGH
+     * @param P quantum particleset
+     * @param iat active particle
+     * @param vghgh Matrix(20,BasisSetSize)
+     * @param trialMove if true, use getTempDists()/getTempDispls()
+     */
+    void
+    evaluateVGHGH(
+        const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh) override;
+
+    /** compute values for the iat-paricle move
+     *
+     * Always uses getTempDists() and getTempDispls()
+     * Tv is a translation vector; In PBC, in order to reduce the number
+     * of images that need to be summed over when generating the AO the
+     * nearest image displacement, dr, is used. Tv corresponds to the
+     * translation that takes the 'general displacement' (displacement
+     * between ion position and electron position) to the nearest image
+     * displacement. We need to keep track of Tv because it must be add
+     * as a phase factor, i.e., exp(i*k*Tv).
+     */
+    void
+    evaluateV(
+        const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals) override;
+
+    void
+    evaluateGradSourceV(const ParticleSetT<ORBT>& P, int iat,
+        const ParticleSetT<ORBT>& ions, int jion, vgl_type& vgl) override;
+
+    void
+    evaluateGradSourceVGL(const ParticleSetT<ORBT>& P, int iat,
+        const ParticleSetT<ORBT>& ions, int jion, vghgh_type& vghgh) override;
+
+    /** add a new set of Centered Atomic Orbitals
+     * @param icenter the index of the center
+     * @param aos a set of Centered Atomic Orbitals
+     */
+    void
+    add(int icenter, std::unique_ptr<COT> aos);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h
new file mode 100644
index 0000000000..111d812ae4
--- /dev/null
+++ b/src/QMCWaveFunctions/OptimizableObjectT.h
@@ -0,0 +1,151 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H
+#define QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H
+
+#include "VariableSetT.h"
+#include "type_traits/template_types.hpp"
+
+/**@file OptimizableObject.h
+ *@brief Declaration of OptimizableObject
+ */
+namespace qmcplusplus
+{
+template <typename T>
+using OptVariablesType = optimize::VariableSetT<T>;
+
+template <typename T>
+class OptimizableObjectT
+{
+public:
+    OptimizableObjectT(const std::string& name) : name_(name)
+    {
+    }
+
+    const std::string&
+    getName() const
+    {
+        return name_;
+    }
+    bool
+    isOptimized() const
+    {
+        return is_optimized_;
+    }
+
+private:
+    /** Name of the optimizable object
+     */
+    const std::string name_;
+    /** If true, this object is actively modified during WFOpt
+     */
+    bool is_optimized_ = false;
+
+public:
+    /** check in variational parameters to the global list of parameters used by
+     * the optimizer.
+     * @param active a super set of optimizable variables
+     *
+     * The existing checkInVariables implementation in WFC/SPO/.. are inclusive
+     * and it calls checkInVariables of its members class A: public SPOSet {}
+     * class B: public WFC
+     * {
+     *   A objA;
+     *   checkInVariables() { objA.checkInVariables(); }
+     * };
+     *
+     * With OptimizableObject,
+     * class A: public OptimizableObject {}
+     * class B: public OptimizableObject
+     * {
+     *   A objA;
+     *   checkInVariablesExclusive() { // should not call
+     * objA.checkInVariablesExclusive() if objA has been extracted; }
+     * };
+     * A vector of OptimizableObject, will be created by calling
+     * extractOptimizableObjects(). All the checkInVariablesExclusive() will be
+     * called through this vector and thus checkInVariablesExclusive
+     * implementation should only handle non-OptimizableObject members.
+     */
+    virtual void
+    checkInVariablesExclusive(OptVariablesType<T>& active) = 0;
+
+    /** reset the parameters during optimizations. Exclusive, see
+     * checkInVariablesExclusive
+     */
+    virtual void
+    resetParametersExclusive(const OptVariablesType<T>& active) = 0;
+
+    /** print the state, e.g., optimizables */
+    virtual void
+    reportStatus(std::ostream& os)
+    {
+    }
+
+    void
+    setOptimization(bool state)
+    {
+        is_optimized_ = state;
+    }
+
+    /** Write the variational parameters for this object to the VP HDF file
+     *
+     * The hout parameter should come from VariableSet::writeToHDF
+     *
+     * Objects can use this function to store additional information to the
+     * file.
+     *
+     * By default the parameters are saved in VariableSet::writeToHDF, and
+     * objects do not need to implement this function (yet).
+     *
+     */
+    virtual void
+    writeVariationalParameters(hdf_archive& hout){};
+
+    /** Read the variational parameters for this object from the VP HDF file
+     *
+     * The hin parameter should come from VariableSet::readFromHDF
+     *
+     * By default the parameters are read in VariableSet::readFromHDF, and
+     * objects do not need to implement this function (yet).
+     */
+    virtual void
+    readVariationalParameters(hdf_archive& hin){};
+};
+
+template <typename T>
+class UniqueOptObjRefsT : public RefVector<OptimizableObjectT<T>>
+{
+public:
+    OptimizableObjectT<T>&
+    operator[](size_t i) const
+    {
+        return RefVector<OptimizableObjectT<T>>::operator[](i);
+    }
+
+    void
+    push_back(OptimizableObjectT<T>& obj)
+    {
+        if (obj.getName().empty())
+            throw std::logic_error("BUG!! Only named OptimizableObject object "
+                                   "can be added to UniqueOptObjRefs!");
+        auto result = std::find_if(
+            this->begin(), this->end(), [&](OptimizableObjectT<T>& element) {
+                return element.getName() == obj.getName();
+            });
+        if (result == this->end())
+            RefVector<OptimizableObjectT<T>>::push_back(obj);
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
index a3acaf7aad..4a092961d9 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
@@ -1,35 +1,37 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 /** @file PWBasis.h
  * @brief Declaration of Plane-wave basis set
  */
 #ifndef QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
 #define QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
 
+#include "CPU/e2iphi.h"
 #include "Configuration.h"
-#include "Particle/ParticleSet.h"
 #include "Message/Communicate.h"
-#include "type_traits/complex_help.hpp"
-#include "CPU/e2iphi.h"
+#include "Particle/ParticleSetT.h"
 #include "hdf/hdf_archive.h"
+#include "type_traits/complex_help.hpp"
 
 /** If defined, use recursive method to build the basis set for each position
  *
  * performance improvement is questionable: load vs sin/cos
  */
-//#define PWBASIS_USE_RECURSIVE
+// #define PWBASIS_USE_RECURSIVE
 
 namespace qmcplusplus
 {
@@ -38,129 +40,145 @@ namespace qmcplusplus
  * Rewrite of PlaneWaveBasis to utilize blas II or III
  * Support more general input tags
  */
-template<typename T>
+template <typename T>
 class PWBasisT : public QMCTraits
 {
 public:
-  using RealType = typename RealAlias_impl<T>::value_type;
-  using ComplexType = T;
-  using PosType = TinyVector<RealType, DIM>;
-  using IndexType = QMCTraits::IndexType;
-  using ParticleLayout = ParticleSet::ParticleLayout;
-  using GIndex_t       = TinyVector<IndexType, 3>;
+    using RealType = typename RealAlias_impl<T>::value_type;
+    using ComplexType = T;
+    using PosType = TinyVector<RealType, DIM>;
+    using IndexType = QMCTraits::IndexType;
+    using ParticleLayout = typename ParticleSetT<T>::ParticleLayout;
+    using GIndex_t = TinyVector<IndexType, 3>;
 
 private:
-  ///max of maxg[i]
-  int maxmaxg;
-  //Need to store the maximum translation in each dimension to use recursive PW generation.
-  GIndex_t maxg;
-  //The PlaneWave data - keep all of these strictly private to prevent inconsistencies.
-  RealType ecut;
-  ///twist angle in reduced
-  PosType twist;
-  ///twist angle in cartesian
-  PosType twist_cart; //Twist angle in reduced and Cartesian.
+    /// max of maxg[i]
+    int maxmaxg;
+    // Need to store the maximum translation in each dimension to use recursive
+    // PW generation.
+    GIndex_t maxg;
+    // The PlaneWave data - keep all of these strictly private to prevent
+    // inconsistencies.
+    RealType ecut;
+    /// twist angle in reduced
+    PosType twist;
+    /// twist angle in cartesian
+    PosType twist_cart; // Twist angle in reduced and Cartesian.
 
-  ///gvecs in reduced coordiates
-  std::vector<GIndex_t> gvecs;
-  ///Reduced coordinates with offset gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
-  std::vector<GIndex_t> gvecs_shifted;
+    /// gvecs in reduced coordiates
+    std::vector<GIndex_t> gvecs;
+    /// Reduced coordinates with offset
+    /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
+    std::vector<GIndex_t> gvecs_shifted;
 
-  std::vector<RealType> minusModKplusG2;
-  std::vector<PosType> kplusgvecs_cart; //Cartesian.
+    std::vector<RealType> minusModKplusG2;
+    std::vector<PosType> kplusgvecs_cart; // Cartesian.
 
-  Matrix<ComplexType> C;
-  //Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not exp(iGr)
-  //We need a way of switching between them for G -> -G, otherwise the
-  //determinant will have multiple rows that are equal (to within a constant factor)
-  //of others, giving a zero determinant. For this, we build a vector (negative) which
-  //stores whether a vector is "+" or "-" (with some criterion, to be defined). We
-  //the switch from cos() to sin() based on the value of this input.
-  std::vector<int> negative;
+    Matrix<ComplexType> C;
+    // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not
+    // exp(iGr) We need a way of switching between them for G -> -G, otherwise
+    // the determinant will have multiple rows that are equal (to within a
+    // constant factor) of others, giving a zero determinant. For this, we build
+    // a vector (negative) which stores whether a vector is "+" or "-" (with
+    // some criterion, to be defined). We the switch from cos() to sin() based
+    // on the value of this input.
+    std::vector<int> negative;
 
 public:
-  //enumeration for the value, laplacian, gradients and size
-  enum
-  {
-    PW_VALUE,
-    PW_LAP,
-    PW_GRADX,
-    PW_GRADY,
-    PW_GRADZ,
-    PW_MAXINDEX
-  };
+    // enumeration for the value, laplacian, gradients and size
+    enum
+    {
+        PW_VALUE,
+        PW_LAP,
+        PW_GRADX,
+        PW_GRADY,
+        PW_GRADZ,
+        PW_MAXINDEX
+    };
 
-  Matrix<ComplexType> Z;
+    Matrix<ComplexType> Z;
 
-  Vector<ComplexType> Zv;
-  /* inputmap is used for a memory efficient way of
-   *
-   * importing the basis-set and coefficients when the desired energy cutoff may be
-   * lower than that represented by all data in the wavefunction input file.
-   * The steps taken are:
-   *  - Read all basis data.
-   *  - Create map. inputmap[i] = j; j is correct PW index, i is input coef index.
-   *    For basis elements outside cutoff, inputmap[i] = gvecs.size();
-   *  - Coefficients are in same order as PWs in inputfile => simply file into
-   *    storage matrix using the map as the input. All excess coefficients are
-   *    put into [gvecs.size()] and not used. i.e. coefs need to be allocated 1 higher.
-   * Such an approach is not needed for Gamma-point only calculations because the
-   * basis is spherically ordered. However, when a twist-angle is used, the "sphere"
-   * of allowed planewaves is shifted.
-   */
+    Vector<ComplexType> Zv;
+    /* inputmap is used for a memory efficient way of
+     *
+     * importing the basis-set and coefficients when the desired energy cutoff
+     * may be lower than that represented by all data in the wavefunction input
+     * file. The steps taken are:
+     *  - Read all basis data.
+     *  - Create map. inputmap[i] = j; j is correct PW index, i is input coef
+     * index. For basis elements outside cutoff, inputmap[i] = gvecs.size();
+     *  - Coefficients are in same order as PWs in inputfile => simply file into
+     *    storage matrix using the map as the input. All excess coefficients are
+     *    put into [gvecs.size()] and not used. i.e. coefs need to be allocated
+     * 1 higher. Such an approach is not needed for Gamma-point only
+     * calculations because the basis is spherically ordered. However, when a
+     * twist-angle is used, the "sphere" of allowed planewaves is shifted.
+     */
 
-  Vector<RealType> phi;
+    Vector<RealType> phi;
 
-  std::vector<int> inputmap;
+    std::vector<int> inputmap;
 
-  ///total number of basis functions
-  int NumPlaneWaves;
+    /// total number of basis functions
+    int NumPlaneWaves;
 
-  ///local copy of Lattice
-  ParticleLayout Lattice;
+    /// local copy of Lattice
+    ParticleLayout Lattice;
 
-  ///default constructor
-  PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {}
+    /// default constructor
+    PWBasisT() : maxmaxg(0), NumPlaneWaves(0)
+    {
+    }
 
-  ///constructor
-  PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {}
+    /// constructor
+    PWBasisT(const PosType& twistangle) :
+        maxmaxg(0),
+        twist(twistangle),
+        NumPlaneWaves(0)
+    {
+    }
 
-  ~PWBasisT() {}
+    ~PWBasisT()
+    {
+    }
 
-  ///set the twist angle
-  void setTwistAngle(const PosType& tang);
+    /// set the twist angle
+    void
+    setTwistAngle(const PosType& tang);
 
-  ///reset
-  void reset();
+    /// reset
+    void
+    reset();
 
-  /** Read basisset from hdf5 file. Apply ecut.
-   * @param h5basisgroup h5 node where basis is located
-   * @param ecutoff cutoff energy
-   * @param lat CrystalLattice
-   * @param resizeContainer if true, resize internal storage.
-   * @return the number of plane waves
-   */
-  int readbasis(hdf_archive& h5basisgroup,
-                RealType ecutoff,
-                const ParticleLayout& lat,
-                const std::string& pwname     = "planewaves",
-                const std::string& pwmultname = "multipliers",
-                bool resizeContainer          = true);
+    /** Read basisset from hdf5 file. Apply ecut.
+     * @param h5basisgroup h5 node where basis is located
+     * @param ecutoff cutoff energy
+     * @param lat CrystalLattice
+     * @param resizeContainer if true, resize internal storage.
+     * @return the number of plane waves
+     */
+    int
+    readbasis(hdf_archive& h5basisgroup, RealType ecutoff,
+        const ParticleLayout& lat, const std::string& pwname = "planewaves",
+        const std::string& pwmultname = "multipliers",
+        bool resizeContainer = true);
 
-  /** Remove basis elements if kinetic energy > ecut.
-   *
-   * Keep and indexmap so we know how to match coefficients on read.
-   */
-  void trimforecut();
+    /** Remove basis elements if kinetic energy > ecut.
+     *
+     * Keep and indexmap so we know how to match coefficients on read.
+     */
+    void
+    trimforecut();
 
 #if defined(PWBASIS_USE_RECURSIVE)
-  /** Fill the recursion coefficients matrix.
-   *
-   * @todo Generalize to non-orthorohmbic cells
-   */
-  inline void BuildRecursionCoefs(const PosType& pos)
-  {
-    PosType tau_red(Lattice.toUnit(pos));
+    /** Fill the recursion coefficients matrix.
+     *
+     * @todo Generalize to non-orthorohmbic cells
+     */
+    inline void
+    BuildRecursionCoefs(const PosType& pos)
+    {
+        PosType tau_red(Lattice.toUnit(pos));
 //      RealType phi=TWOPI*tau_red[0];
 //      RealType nphi=maxg0*phi;
 //      ComplexType ct0(std::cos(phi),std::sin(phi));
@@ -182,162 +200,170 @@ class PWBasisT : public QMCTraits
 //      C2[0]=t;
 //      for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0);
 #pragma ivdep
-    for (int idim = 0; idim < 3; idim++)
+        for (int idim = 0; idim < 3; idim++) {
+            int ng = maxg[idim];
+            RealType phi = TWOPI * tau_red[idim];
+            RealType nphi = ng * phi;
+            ComplexType Ctemp(std::cos(phi), std::sin(phi));
+            ComplexType t(std::cos(nphi), -std::sin(nphi));
+            ComplexType* restrict cp_ptr = C[idim];
+            *cp_ptr++ = t;
+            for (int n = 1; n <= 2 * ng; n++) {
+                *cp_ptr++ = (t *= Ctemp);
+            }
+        }
+        // Base version
+        // #pragma ivdep
+        //       for(int idim=0; idim<3; idim++){
+        //         RealType phi=TWOPI*tau_red[idim];
+        //         ComplexType Ctemp(std::cos(phi),std::sin(phi));
+        //         int ng=maxg[idim];
+        //         ComplexType* restrict cp_ptr=C[idim]+ng;
+        //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
+        //         *cp_ptr=1.0;
+        //         for(int n=1; n<=ng; n++,cn_ptr--){
+        //           ComplexType t(Ctemp*(*cp_ptr++));
+        //           *cp_ptr = t;
+        //           *cn_ptr = conj(t);
+        //         }
+        //       }
+        // Not valid for general supercell
+        //       // Cartesian of twist for 1,1,1 (reduced coordinates)
+        //       PosType G111(1.0,1.0,1.0);
+        //       G111 = Lattice.k_cart(G111);
+        //
+        //       //Precompute a small number of complex factors (PWs along
+        //       b1,b2,b3 lines)
+        //       //using a fast recursion algorithm
+        // #pragma ivdep
+        //       for(int idim=0; idim<3; idim++){
+        //         //start the recursion with the 111 vector.
+        //         RealType phi = pos[idim] * G111[idim];
+        //         register ComplexType Ctemp(std::cos(phi), std::sin(phi));
+        //         int ng=maxg[idim];
+        //         ComplexType* restrict cp_ptr=C[idim]+ng;
+        //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
+        //         *cp_ptr=1.0;
+        //         for(int n=1; n<=ng; n++,cn_ptr--){
+        //           ComplexType t(Ctemp*(*cp_ptr++));
+        //           *cp_ptr = t;
+        //           *cn_ptr = conj(t);
+        //         }
+        //       }
+    }
+
+    inline void
+    evaluate(const PosType& pos)
     {
-      int ng        = maxg[idim];
-      RealType phi  = TWOPI * tau_red[idim];
-      RealType nphi = ng * phi;
-      ComplexType Ctemp(std::cos(phi), std::sin(phi));
-      ComplexType t(std::cos(nphi), -std::sin(nphi));
-      ComplexType* restrict cp_ptr = C[idim];
-      *cp_ptr++                    = t;
-      for (int n = 1; n <= 2 * ng; n++)
-      {
-        *cp_ptr++ = (t *= Ctemp);
-      }
+        BuildRecursionCoefs(pos);
+        RealType twistdotr = dot(twist_cart, pos);
+        ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+        // Evaluate the planewaves for particle iat.
+        for (int ig = 0; ig < NumPlaneWaves; ig++) {
+            // PW is initialized as exp(i*twist.r) so that the final basis
+            // evaluations are for (twist+G).r
+            ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr));
+            for (int idim = 0; idim < 3; idim++)
+                pw *= C(idim, gvecs_shifted[ig][idim]);
+            // pw *= C0[gvecs_shifted[ig][0]];
+            // pw *= C1[gvecs_shifted[ig][1]];
+            // pw *= C2[gvecs_shifted[ig][2]];
+            Zv[ig] = pw;
+        }
     }
-    //Base version
-    //#pragma ivdep
-    //      for(int idim=0; idim<3; idim++){
-    //        RealType phi=TWOPI*tau_red[idim];
-    //        ComplexType Ctemp(std::cos(phi),std::sin(phi));
-    //        int ng=maxg[idim];
-    //        ComplexType* restrict cp_ptr=C[idim]+ng;
-    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
-    //        *cp_ptr=1.0;
-    //        for(int n=1; n<=ng; n++,cn_ptr--){
-    //          ComplexType t(Ctemp*(*cp_ptr++));
-    //          *cp_ptr = t;
-    //          *cn_ptr = conj(t);
-    //        }
-    //      }
-    //Not valid for general supercell
+    /** Evaluate all planewaves and derivatives for the iat-th particle
+     *
+     * The basis functions are evaluated for particles iat: first <= iat < last
+     * Evaluate the plane-waves at current particle coordinates using a fast
+     * recursion algorithm. Order of Y,dY and d2Y is kept correct.
+     * These can be "dotted" with coefficients later to complete orbital
+     * evaluations.
+     */
+    inline void
+    evaluateAll(const ParticleSetT<T>& P, int iat)
+    {
+        const PosType& r(P.activeR(iat));
+        BuildRecursionCoefs(r);
+        RealType twistdotr = dot(twist_cart, r);
+        ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+        // Evaluate the planewaves and derivatives.
+        ComplexType* restrict zptr = Z.data();
+        for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) {
+            // PW is initialized as exp(i*twist.r) so that the final basis
+            // evaluations are for (twist+G).r
+            ComplexType pw(pw0);
+            // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
+            for (int idim = 0; idim < 3; idim++)
+                pw *= C(idim, gvecs_shifted[ig][idim]);
+            // pw *= C0[gvecs_shifted[ig][0]];
+            // pw *= C1[gvecs_shifted[ig][1]];
+            // pw *= C2[gvecs_shifted[ig][2]];
+            zptr[0] = pw;
+            zptr[1] = minusModKplusG2[ig] * pw;
+            zptr[2] =
+                kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+            zptr[3] =
+                kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+            zptr[4] =
+                kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+        }
+    }
+#else
+    inline void
+    evaluate(const PosType& pos)
+    {
+        // Evaluate the planewaves for particle iat.
+        for (int ig = 0; ig < NumPlaneWaves; ig++)
+            phi[ig] = dot(kplusgvecs_cart[ig], pos);
+        eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
+    }
+    inline void
+    evaluateAll(const ParticleSetT<T>& P, int iat)
+    {
+        const PosType& r(P.activeR(iat));
+        evaluate(r);
+        ComplexType* restrict zptr = Z.data();
+        for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) {
+            // PW is initialized as exp(i*twist.r) so that the final basis
+            // evaluations are for (twist+G).r
+            ComplexType& pw = Zv[ig];
+            zptr[0] = pw;
+            zptr[1] = minusModKplusG2[ig] * pw;
+            zptr[2] =
+                kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+            zptr[3] =
+                kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+            zptr[4] =
+                kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+        }
+    }
+#endif
+    //    /** Fill the recursion coefficients matrix.
+    //     *
+    //     * @todo Generalize to non-orthorohmbic cells
+    //     */
+    //    void BuildRecursionCoefsByAdd(const PosType& pos)
+    //    {
     //      // Cartesian of twist for 1,1,1 (reduced coordinates)
     //      PosType G111(1.0,1.0,1.0);
     //      G111 = Lattice.k_cart(G111);
-    //
-    //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
-    //      //using a fast recursion algorithm
-    //#pragma ivdep
-    //      for(int idim=0; idim<3; idim++){
+    //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
+    //      //Precompute a small number of complex factors (PWs along b1,b2,b3
+    //      lines) for(int idim=0; idim<3; idim++){
     //        //start the recursion with the 111 vector.
     //        RealType phi = pos[idim] * G111[idim];
-    //        register ComplexType Ctemp(std::cos(phi), std::sin(phi));
-    //        int ng=maxg[idim];
-    //        ComplexType* restrict cp_ptr=C[idim]+ng;
-    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
-    //        *cp_ptr=1.0;
+    //        int ng(maxg[idim]);
+    //        RealType* restrict cp_ptr=logC[idim]+ng;
+    //        RealType* restrict cn_ptr=logC[idim]+ng-1;
+    //        *cp_ptr=0.0;
+    //        //add INTEL vectorization
     //        for(int n=1; n<=ng; n++,cn_ptr--){
-    //          ComplexType t(Ctemp*(*cp_ptr++));
+    //          RealType t(phi+*cp_ptr++);
     //          *cp_ptr = t;
-    //          *cn_ptr = conj(t);
+    //          *cn_ptr = -t;
     //        }
     //      }
-  }
-
-  inline void evaluate(const PosType& pos)
-  {
-    BuildRecursionCoefs(pos);
-    RealType twistdotr = dot(twist_cart, pos);
-    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
-    //Evaluate the planewaves for particle iat.
-    for (int ig = 0; ig < NumPlaneWaves; ig++)
-    {
-      //PW is initialized as exp(i*twist.r) so that the final basis evaluations are for (twist+G).r
-      ComplexType pw(pw0); //std::cos(twistdotr),std::sin(twistdotr));
-      for (int idim = 0; idim < 3; idim++)
-        pw *= C(idim, gvecs_shifted[ig][idim]);
-      //pw *= C0[gvecs_shifted[ig][0]];
-      //pw *= C1[gvecs_shifted[ig][1]];
-      //pw *= C2[gvecs_shifted[ig][2]];
-      Zv[ig] = pw;
-    }
-  }
-  /** Evaluate all planewaves and derivatives for the iat-th particle
-   *
-   * The basis functions are evaluated for particles iat: first <= iat < last
-   * Evaluate the plane-waves at current particle coordinates using a fast
-   * recursion algorithm. Order of Y,dY and d2Y is kept correct.
-   * These can be "dotted" with coefficients later to complete orbital evaluations.
-   */
-  inline void evaluateAll(const ParticleSet& P, int iat)
-  {
-    const PosType& r(P.activeR(iat));
-    BuildRecursionCoefs(r);
-    RealType twistdotr = dot(twist_cart, r);
-    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
-    //Evaluate the planewaves and derivatives.
-    ComplexType* restrict zptr = Z.data();
-    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
-    {
-      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
-      //are for (twist+G).r
-      ComplexType pw(pw0);
-      // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
-      for (int idim = 0; idim < 3; idim++)
-        pw *= C(idim, gvecs_shifted[ig][idim]);
-      //pw *= C0[gvecs_shifted[ig][0]];
-      //pw *= C1[gvecs_shifted[ig][1]];
-      //pw *= C2[gvecs_shifted[ig][2]];
-      zptr[0] = pw;
-      zptr[1] = minusModKplusG2[ig] * pw;
-      zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
-      zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
-      zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
-    }
-  }
-#else
-  inline void evaluate(const PosType& pos)
-  {
-    //Evaluate the planewaves for particle iat.
-    for (int ig = 0; ig < NumPlaneWaves; ig++)
-      phi[ig] = dot(kplusgvecs_cart[ig], pos);
-    eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
-  }
-  inline void evaluateAll(const ParticleSet& P, int iat)
-  {
-    const PosType& r(P.activeR(iat));
-    evaluate(r);
-    ComplexType* restrict zptr = Z.data();
-    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
-    {
-      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
-      //are for (twist+G).r
-      ComplexType& pw = Zv[ig];
-      zptr[0]         = pw;
-      zptr[1]         = minusModKplusG2[ig] * pw;
-      zptr[2]         = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
-      zptr[3]         = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
-      zptr[4]         = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
-    }
-  }
-#endif
-  //    /** Fill the recursion coefficients matrix.
-  //     *
-  //     * @todo Generalize to non-orthorohmbic cells
-  //     */
-  //    void BuildRecursionCoefsByAdd(const PosType& pos)
-  //    {
-  //      // Cartesian of twist for 1,1,1 (reduced coordinates)
-  //      PosType G111(1.0,1.0,1.0);
-  //      G111 = Lattice.k_cart(G111);
-  //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
-  //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
-  //      for(int idim=0; idim<3; idim++){
-  //        //start the recursion with the 111 vector.
-  //        RealType phi = pos[idim] * G111[idim];
-  //        int ng(maxg[idim]);
-  //        RealType* restrict cp_ptr=logC[idim]+ng;
-  //        RealType* restrict cn_ptr=logC[idim]+ng-1;
-  //        *cp_ptr=0.0;
-  //        //add INTEL vectorization
-  //        for(int n=1; n<=ng; n++,cn_ptr--){
-  //          RealType t(phi+*cp_ptr++);
-  //          *cp_ptr = t;
-  //          *cn_ptr = -t;
-  //        }
-  //      }
-  //    }
+    //    }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
index a3b1e135ec..6d82f8fdac 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
@@ -1,145 +1,156 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory Mark
+//                    Dewing, markdewing@gmail.com, University of Illinois at
+//                    Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
+#include "PWOrbitalSetT.h"
 
 #include "Message/Communicate.h"
-#include "PWOrbitalSetT.h"
 #include "Numerics/MatrixOperators.h"
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 PWOrbitalSetT<T>::~PWOrbitalSetT()
 {
-  if (OwnBasisSet && myBasisSet)
-    delete myBasisSet;
-  if (!IsCloned && this->C != nullptr)
-    delete this->C;
+    if (OwnBasisSet && myBasisSet)
+        delete myBasisSet;
+    if (!IsCloned && this->C != nullptr)
+        delete this->C;
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> PWOrbitalSetT<T>::makeClone() const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+PWOrbitalSetT<T>::makeClone() const
 {
-  auto myclone        = std::make_unique<PWOrbitalSetT<T>>(*this);
-  myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
-  myclone->IsCloned   = true;
-  return myclone;
+    auto myclone = std::make_unique<PWOrbitalSetT<T>>(*this);
+    myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
+    myclone->IsCloned = true;
+    return myclone;
 }
 
-template<class T>
-void PWOrbitalSetT<T>::setOrbitalSetSize(int norbs) {}
-
-template<class T>
-void PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+template <class T>
+void
+PWOrbitalSetT<T>::setOrbitalSetSize(int norbs)
 {
-  myBasisSet     = bset;
-  this->OrbitalSetSize = nbands;
-  OwnBasisSet    = cleanup;
-  BasisSetSize   = myBasisSet->NumPlaneWaves;
-  this->C              = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
-  this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
-  app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize =" << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize
-            << std::endl;
 }
 
-template<class T>
-void PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+template <class T>
+void
+PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
 {
-  int ng = myBasisSet->inputmap.size();
-  if (ng != coefs.size())
-  {
-    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
-    OHMMS::Controller->abort();
-  }
-  //drop G points for the given TwistAngle
-  const std::vector<int>& inputmap(myBasisSet->inputmap);
-  for (int ig = 0; ig < ng; ig++)
-  {
-    if (inputmap[ig] > -1)
-      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
-  }
+    myBasisSet = bset;
+    this->OrbitalSetSize = nbands;
+    OwnBasisSet = cleanup;
+    BasisSetSize = myBasisSet->NumPlaneWaves;
+    this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
+    this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+    app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize ="
+              << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize
+              << std::endl;
 }
 
-template<class T>
-void PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+template <class T>
+void
+PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
 {
-  int ng = myBasisSet->inputmap.size();
-  if (ng != coefs.size())
-  {
-    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
-    OHMMS::Controller->abort();
-  }
-  //drop G points for the given TwistAngle
-  const std::vector<int>& inputmap(myBasisSet->inputmap);
-  for (int ig = 0; ig < ng; ig++)
-  {
-    if (inputmap[ig] > -1)
-      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
-  }
+    int ng = myBasisSet->inputmap.size();
+    if (ng != coefs.size()) {
+        app_error()
+            << "  Input G map does not match the basis size of wave functions "
+            << std::endl;
+        OHMMS::Controller->abort();
+    }
+    // drop G points for the given TwistAngle
+    const std::vector<int>& inputmap(myBasisSet->inputmap);
+    for (int ig = 0; ig < ng; ig++) {
+        if (inputmap[ig] > -1)
+            (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+    }
 }
 
-template<class T>
-void PWOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
 {
-  //Evaluate every orbital for particle iat.
-  //Evaluate the basis-set at these coordinates:
-  //myBasisSet->evaluate(P,iat);
-  myBasisSet->evaluate(P.activeR(iat));
-  MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
+    int ng = myBasisSet->inputmap.size();
+    if (ng != coefs.size()) {
+        app_error()
+            << "  Input G map does not match the basis size of wave functions "
+            << std::endl;
+        OHMMS::Controller->abort();
+    }
+    // drop G points for the given TwistAngle
+    const std::vector<int>& inputmap(myBasisSet->inputmap);
+    for (int ig = 0; ig < ng; ig++) {
+        if (inputmap[ig] > -1)
+            (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+    }
 }
 
-template<class T>
-void PWOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template <class T>
+void
+PWOrbitalSetT<T>::evaluateValue(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  //Evaluate the orbitals and derivatives for particle iat only.
-  myBasisSet->evaluateAll(P, iat);
-  MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
-  const T* restrict tptr = this->Temp.data();
-  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
-  {
-    psi[j]   = tptr[PW_VALUE];
-    d2psi[j] = tptr[PW_LAP];
-    dpsi[j]  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
-  }
+    // Evaluate every orbital for particle iat.
+    // Evaluate the basis-set at these coordinates:
+    // myBasisSet->evaluate(P,iat);
+    myBasisSet->evaluate(P.activeR(iat));
+    MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
 }
 
-template<class T>
-void PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                        int first,
-                                        int last,
-                                        ValueMatrix& logdet,
-                                        GradMatrix& dlogdet,
-                                        ValueMatrix& d2logdet)
+template <class T>
+void
+PWOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
+    // Evaluate the orbitals and derivatives for particle iat only.
     myBasisSet->evaluateAll(P, iat);
     MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
     const T* restrict tptr = this->Temp.data();
-    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
-    {
-      logdet(i, j)   = tptr[PW_VALUE];
-      d2logdet(i, j) = tptr[PW_LAP];
-      dlogdet(i, j)  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) {
+        psi[j] = tptr[PW_VALUE];
+        d2psi[j] = tptr[PW_LAP];
+        dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+    }
+}
+
+template <class T>
+void
+PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+{
+    for (int iat = first, i = 0; iat < last; iat++, i++) {
+        myBasisSet->evaluateAll(P, iat);
+        MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+        const T* restrict tptr = this->Temp.data();
+        for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) {
+            logdet(i, j) = tptr[PW_VALUE];
+            d2logdet(i, j) = tptr[PW_LAP];
+            dlogdet(i, j) =
+                GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+        }
     }
-  }
 }
 
 // Class concrete types from T
-// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is inherently complex
-// template class PWOrbitalSetT<double>;
-// template class PWOrbitalSetT<float>;
+// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is
+// inherently complex template class PWOrbitalSetT<double>; template class
+// PWOrbitalSetT<float>;
 template class PWOrbitalSetT<std::complex<double>>;
 template class PWOrbitalSetT<std::complex<float>>;
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
index 25c3e0d5c1..d4e13de966 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
@@ -1,128 +1,146 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark Dewing,
+//                    markdewing@gmail.com, University of Illinois at
+//                    Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 /** @file PWOrbitalSetT.h
  * @brief Definition of member functions of Plane-wave basis set
  */
 #ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
 #define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
 
+#include "CPU/BLAS.hpp"
 #include "QMCWaveFunctions/PlaneWave/PWBasisT.h"
-#include "type_traits/complex_help.hpp"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "CPU/BLAS.hpp"
+#include "type_traits/complex_help.hpp"
 
 namespace qmcplusplus
 {
 
-template<class T>
+template <class T>
 class PWOrbitalSetT : public SPOSetT<T>
 {
 public:
-  using RealType    = typename SPOSetT<T>::RealType;
-  using ComplexType = T;
-  using PosType     = typename SPOSetT<T>::PosType;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using GradType    = typename SPOSetT<T>::GradType;
-  using IndexType   = typename SPOSetT<T>::IndexType;
-
-  using BasisSet_t = PWBasisT<T>;
-  using PWBasisPtr = PWBasisT<T>*;
-
-  /** inherit the enum of BasisSet_t */
-  enum
-  {
-    PW_VALUE    = BasisSet_t::PW_VALUE,
-    PW_LAP      = BasisSet_t::PW_LAP,
-    PW_GRADX    = BasisSet_t::PW_GRADX,
-    PW_GRADY    = BasisSet_t::PW_GRADY,
-    PW_GRADZ    = BasisSet_t::PW_GRADZ,
-    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
-  };
-
-
-  /** default constructor
-  */
-  PWOrbitalSetT<T>(const std::string& my_name)
-      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false)
-  {}
-
-  std::string getClassName() const override { return "PWOrbitalSetT"; }
-
-
-  /** delete BasisSet only it owns this
-   *
-   * Builder takes care of who owns what
-   */
-  ~PWOrbitalSetT<T>() override;
-
-  std::unique_ptr<SPOSetT<T>> makeClone() const override;
-  /** resize  the orbital base
-   * @param bset PWBasis
-   * @param nbands number of bands
-   * @param cleaup if true, owns PWBasis. Will clean up.
-   */
-  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
-
-  /** Builder class takes care of the assertion
-  */
-  void addVector(const std::vector<ComplexType>& coefs, int jorb);
-  void addVector(const std::vector<RealType>& coefs, int jorb);
-
-  void setOrbitalSetSize(int norbs) override;
-
-  inline T evaluate(int ib, const PosType& pos)
-  {
-    myBasisSet->evaluate(pos);
-    return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
-  }
-
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
-
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override;
-
-  /** boolean
-   *
-   * If true, this has to delete the BasisSet
-   */
-  bool OwnBasisSet;
-  ///TwistAngle of this PWOrbitalSetT
-  PosType TwistAngle;
-  ///My basis set
-  PWBasisPtr myBasisSet;
-  ///number of basis
-  IndexType BasisSetSize;
-  /** pointer to matrix containing the coefficients
-   *
-   * makeClone makes a shallow copy and flag IsCloned
-   */
-  ValueMatrix* C;
-  ///if true, do not clean up
-  bool IsCloned;
-
-  /** temporary array to perform gemm operation */
-  Matrix<T> Temp;
+    using RealType = typename SPOSetT<T>::RealType;
+    using ComplexType = T;
+    using PosType = typename SPOSetT<T>::PosType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using GradType = typename SPOSetT<T>::GradType;
+    using IndexType = typename SPOSetT<T>::IndexType;
+
+    using BasisSet_t = PWBasisT<T>;
+    using PWBasisPtr = PWBasisT<T>*;
+
+    /** inherit the enum of BasisSet_t */
+    enum
+    {
+        PW_VALUE = BasisSet_t::PW_VALUE,
+        PW_LAP = BasisSet_t::PW_LAP,
+        PW_GRADX = BasisSet_t::PW_GRADX,
+        PW_GRADY = BasisSet_t::PW_GRADY,
+        PW_GRADZ = BasisSet_t::PW_GRADZ,
+        PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+    };
+
+    /** default constructor
+     */
+    PWOrbitalSetT<T>(const std::string& my_name) :
+        SPOSetT<T>(my_name),
+        OwnBasisSet(false),
+        myBasisSet(nullptr),
+        BasisSetSize(0),
+        C(nullptr),
+        IsCloned(false)
+    {
+    }
+
+    std::string
+    getClassName() const override
+    {
+        return "PWOrbitalSetT";
+    }
+
+    /** delete BasisSet only it owns this
+     *
+     * Builder takes care of who owns what
+     */
+    ~PWOrbitalSetT<T>() override;
+
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override;
+    /** resize  the orbital base
+     * @param bset PWBasis
+     * @param nbands number of bands
+     * @param cleaup if true, owns PWBasis. Will clean up.
+     */
+    void
+    resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+
+    /** Builder class takes care of the assertion
+     */
+    void
+    addVector(const std::vector<ComplexType>& coefs, int jorb);
+    void
+    addVector(const std::vector<RealType>& coefs, int jorb);
+
+    void
+    setOrbitalSetSize(int norbs) override;
+
+    inline T
+    evaluate(int ib, const PosType& pos)
+    {
+        myBasisSet->evaluate(pos);
+        return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
+    }
+
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override;
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override;
+
+    /** boolean
+     *
+     * If true, this has to delete the BasisSet
+     */
+    bool OwnBasisSet;
+    /// TwistAngle of this PWOrbitalSetT
+    PosType TwistAngle;
+    /// My basis set
+    PWBasisPtr myBasisSet;
+    /// number of basis
+    IndexType BasisSetSize;
+    /** pointer to matrix containing the coefficients
+     *
+     * makeClone makes a shallow copy and flag IsCloned
+     */
+    ValueMatrix* C;
+    /// if true, do not clean up
+    bool IsCloned;
+
+    /** temporary array to perform gemm operation */
+    Matrix<T> Temp;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index f76150ec2a..128bca9798 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -21,1022 +21,1006 @@
 
 namespace qmcplusplus
 {
-template<typename T>
-RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
-    : SPOSetT<T>(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
+template <typename T>
+RotatedSPOsT<T>::RotatedSPOsT(
+    const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos) :
+    SPOSetT<T>(my_name),
+    OptimizableObjectT<T>(my_name),
+    Phi(std::move(spos)),
+    nel_major_(0),
+    params_supplied(false)
 {
-  this->OrbitalSetSize = Phi->getOrbitalSetSize();
+    this->OrbitalSetSize = Phi->getOrbitalSetSize();
 }
 
-template<typename T>
+template <typename T>
 RotatedSPOsT<T>::~RotatedSPOsT()
-{}
+{
+}
 
-template<typename T>
-void RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
+template <typename T>
+void
+RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
 {
-  params          = param_list;
-  params_supplied = true;
+    params = param_list;
+    params_supplied = true;
 }
 
-template<typename T>
-void RotatedSPOsT<T>::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
+template <typename T>
+void
+RotatedSPOsT<T>::createRotationIndices(
+    int nel, int nmo, RotationIndices& rot_indices)
 {
-  for (int i = 0; i < nel; i++)
-    for (int j = nel; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
+    for (int i = 0; i < nel; i++)
+        for (int j = nel; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices)
+template <typename T>
+void
+RotatedSPOsT<T>::createRotationIndicesFull(
+    int nel, int nmo, RotationIndices& rot_indices)
 {
-  rot_indices.reserve(nmo * (nmo - 1) / 2);
-
-  // start with core-active rotations - put them at the beginning of the list
-  // so it matches the other list of rotation indices
-  for (int i = 0; i < nel; i++)
-    for (int j = nel; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
-
-  // Add core-core rotations - put them at the end of the list
-  for (int i = 0; i < nel; i++)
-    for (int j = i + 1; j < nel; j++)
-      rot_indices.emplace_back(i, j);
-
-  // Add active-active rotations - put them at the end of the list
-  for (int i = nel; i < nmo; i++)
-    for (int j = i + 1; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
+    rot_indices.reserve(nmo * (nmo - 1) / 2);
+
+    // start with core-active rotations - put them at the beginning of the list
+    // so it matches the other list of rotation indices
+    for (int i = 0; i < nel; i++)
+        for (int j = nel; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
+
+    // Add core-core rotations - put them at the end of the list
+    for (int i = 0; i < nel; i++)
+        for (int j = i + 1; j < nel; j++)
+            rot_indices.emplace_back(i, j);
+
+    // Add active-active rotations - put them at the end of the list
+    for (int i = nel; i < nmo; i++)
+        for (int j = i + 1; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                                   const std::vector<RealType>& param,
-                                                   ValueMatrix& rot_mat)
+template <typename T>
+void
+RotatedSPOsT<T>::constructAntiSymmetricMatrix(
+    const RotationIndices& rot_indices, const std::vector<RealType>& param,
+    ValueMatrix& rot_mat)
 {
-  assert(rot_indices.size() == param.size());
-  // Assumes rot_mat is of the correct size
+    assert(rot_indices.size() == param.size());
+    // Assumes rot_mat is of the correct size
 
-  rot_mat = 0.0;
+    rot_mat = 0.0;
 
-  for (int i = 0; i < rot_indices.size(); i++)
-  {
-    const int p      = rot_indices[i].first;
-    const int q      = rot_indices[i].second;
-    const RealType x = param[i];
+    for (int i = 0; i < rot_indices.size(); i++) {
+        const int p = rot_indices[i].first;
+        const int q = rot_indices[i].second;
+        const RealType x = param[i];
 
-    rot_mat[q][p] = x;
-    rot_mat[p][q] = -x;
-  }
+        rot_mat[q][p] = x;
+        rot_mat[p][q] = -x;
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                                           const ValueMatrix& rot_mat,
-                                                           std::vector<RealType>& param)
+template <typename T>
+void
+RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(
+    const RotationIndices& rot_indices, const ValueMatrix& rot_mat,
+    std::vector<RealType>& param)
 {
-  assert(rot_indices.size() == param.size());
-  // Assumes rot_mat is of the correct size
-
-  for (int i = 0; i < rot_indices.size(); i++)
-  {
-    const int p = rot_indices[i].first;
-    const int q = rot_indices[i].second;
-    param[i]    = rot_mat[q][p];
-  }
+    assert(rot_indices.size() == param.size());
+    // Assumes rot_mat is of the correct size
+
+    for (int i = 0; i < rot_indices.size(); i++) {
+        const int p = rot_indices[i].first;
+        const int q = rot_indices[i].second;
+        param[i] = rot_mat[q][p];
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::resetParametersExclusive(const opt_variables_type& active)
+template <typename T>
+void
+RotatedSPOsT<T>::resetParametersExclusive(const OptVariablesType<T>& active)
 {
-  std::vector<RealType> delta_param(m_act_rot_inds.size());
-
-  size_t psize = m_act_rot_inds.size();
-
-  if (use_global_rot_)
-  {
-    psize = m_full_rot_inds.size();
-    assert(psize >= m_act_rot_inds.size());
-  }
-
-  std::vector<RealType> old_param(psize);
-  std::vector<RealType> new_param(psize);
-
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int loc         = this->myVars.where(i);
-    delta_param[i]  = active[loc] - this->myVars[i];
-    this->myVars[i] = active[loc];
-  }
-
-  if (use_global_rot_)
-  {
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-      old_param[i] = myVarsFull[i];
-
-    applyDeltaRotation(delta_param, old_param, new_param);
-
-    // Save the the params
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-      myVarsFull[i] = new_param[i];
-  }
-  else
-  {
-    apply_rotation(delta_param, false);
-
-    // Save the parameters in the history list
-    history_params_.push_back(delta_param);
-  }
+    std::vector<RealType> delta_param(m_act_rot_inds.size());
+
+    size_t psize = m_act_rot_inds.size();
+
+    if (use_global_rot_) {
+        psize = m_full_rot_inds.size();
+        assert(psize >= m_act_rot_inds.size());
+    }
+
+    std::vector<RealType> old_param(psize);
+    std::vector<RealType> new_param(psize);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int loc = this->myVars.where(i);
+        delta_param[i] = active[loc] - this->myVars[i];
+        this->myVars[i] = active[loc];
+    }
+
+    if (use_global_rot_) {
+        for (int i = 0; i < m_full_rot_inds.size(); i++)
+            old_param[i] = myVarsFull[i];
+
+        applyDeltaRotation(delta_param, old_param, new_param);
+
+        // Save the the params
+        for (int i = 0; i < m_full_rot_inds.size(); i++)
+            myVarsFull[i] = new_param[i];
+    }
+    else {
+        apply_rotation(delta_param, false);
+
+        // Save the parameters in the history list
+        history_params_.push_back(delta_param);
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
+template <typename T>
+void
+RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
 {
-  hout.push("RotatedSPOsT");
-  if (use_global_rot_)
-  {
-    hout.push("rotation_global");
-    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
-
-    int nparam_full = myVarsFull.size();
-    std::vector<RealType> full_params(nparam_full);
-    for (int i = 0; i < nparam_full; i++)
-      full_params[i] = myVarsFull[i];
-
-    hout.write(full_params, rot_global_name);
-    hout.pop();
-  }
-  else
-  {
-    hout.push("rotation_history");
-    size_t rows = history_params_.size();
-    size_t cols = 0;
-    if (rows > 0)
-      cols = history_params_[0].size();
-
-    Matrix<RealType> tmp(rows, cols);
-    for (size_t i = 0; i < rows; i++)
-      for (size_t j = 0; j < cols; j++)
-        tmp(i, j) = history_params_[i][j];
-
-    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
-    hout.write(tmp, rot_hist_name);
-    hout.pop();
-  }
+    hout.push("RotatedSPOsT");
+    if (use_global_rot_) {
+        hout.push("rotation_global");
+        std::string rot_global_name =
+            std::string("rotation_global_") + SPOSetT<T>::getName();
+
+        int nparam_full = myVarsFull.size();
+        std::vector<RealType> full_params(nparam_full);
+        for (int i = 0; i < nparam_full; i++)
+            full_params[i] = myVarsFull[i];
+
+        hout.write(full_params, rot_global_name);
+        hout.pop();
+    }
+    else {
+        hout.push("rotation_history");
+        size_t rows = history_params_.size();
+        size_t cols = 0;
+        if (rows > 0)
+            cols = history_params_[0].size();
+
+        Matrix<RealType> tmp(rows, cols);
+        for (size_t i = 0; i < rows; i++)
+            for (size_t j = 0; j < cols; j++)
+                tmp(i, j) = history_params_[i][j];
+
+        std::string rot_hist_name =
+            std::string("rotation_history_") + SPOSetT<T>::getName();
+        hout.write(tmp, rot_hist_name);
+        hout.pop();
+    }
 
-  // Save myVars in order to restore object state exactly
-  //  The values aren't meaningful, but they need to match those saved in
-  //  VariableSet
-  hout.push("rotation_params");
-  std::string rot_params_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+    // Save myVars in order to restore object state exactly
+    //  The values aren't meaningful, but they need to match those saved in
+    //  VariableSet
+    hout.push("rotation_params");
+    std::string rot_params_name =
+        std::string("rotation_params_") + SPOSetT<T>::getName();
 
-  int nparam = this->myVars.size();
-  std::vector<RealType> params(nparam);
-  for (int i = 0; i < nparam; i++)
-    params[i] = this->myVars[i];
+    int nparam = this->myVars.size();
+    std::vector<RealType> params(nparam);
+    for (int i = 0; i < nparam; i++)
+        params[i] = this->myVars[i];
 
-  hout.write(params, rot_params_name);
-  hout.pop();
+    hout.write(params, rot_params_name);
+    hout.pop();
 
-  hout.pop();
+    hout.pop();
 }
 
-template<typename T>
-void RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
+template <typename T>
+void
+RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
 {
-  hin.push("RotatedSPOsT", false);
+    hin.push("RotatedSPOsT", false);
+
+    bool grp_hist_exists = hin.is_group("rotation_history");
+    bool grp_global_exists = hin.is_group("rotation_global");
+    if (!grp_hist_exists && !grp_global_exists)
+        app_warning() << "Rotation parameters not found in VP file";
+
+    if (grp_global_exists) {
+        hin.push("rotation_global", false);
+        std::string rot_global_name =
+            std::string("rotation_global_") + SPOSetT<T>::getName();
+
+        std::vector<int> sizes(1);
+        if (!hin.getShape<RealType>(rot_global_name, sizes))
+            throw std::runtime_error(
+                "Failed to read rotation_global in VP file");
+
+        int nparam_full_actual = sizes[0];
+        int nparam_full = myVarsFull.size();
+
+        if (nparam_full != nparam_full_actual) {
+            std::ostringstream tmp_err;
+            tmp_err << "Expected number of full rotation parameters ("
+                    << nparam_full << ") does not match number in file ("
+                    << nparam_full_actual << ")";
+            throw std::runtime_error(tmp_err.str());
+        }
+        std::vector<RealType> full_params(nparam_full);
+        hin.read(full_params, rot_global_name);
+        for (int i = 0; i < nparam_full; i++)
+            myVarsFull[i] = full_params[i];
 
-  bool grp_hist_exists   = hin.is_group("rotation_history");
-  bool grp_global_exists = hin.is_group("rotation_global");
-  if (!grp_hist_exists && !grp_global_exists)
-    app_warning() << "Rotation parameters not found in VP file";
+        hin.pop();
 
-  if (grp_global_exists)
-  {
-    hin.push("rotation_global", false);
-    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+        applyFullRotation(full_params, true);
+    }
+    else if (grp_hist_exists) {
+        hin.push("rotation_history", false);
+        std::string rot_hist_name =
+            std::string("rotation_history_") + SPOSetT<T>::getName();
+        std::vector<int> sizes(2);
+        if (!hin.getShape<RealType>(rot_hist_name, sizes))
+            throw std::runtime_error(
+                "Failed to read rotation history in VP file");
+
+        int rows = sizes[0];
+        int cols = sizes[1];
+        history_params_.resize(rows);
+        Matrix<RealType> tmp(rows, cols);
+        hin.read(tmp, rot_hist_name);
+        for (size_t i = 0; i < rows; i++) {
+            history_params_[i].resize(cols);
+            for (size_t j = 0; j < cols; j++)
+                history_params_[i][j] = tmp(i, j);
+        }
 
-    std::vector<int> sizes(1);
-    if (!hin.getShape<RealType>(rot_global_name, sizes))
-      throw std::runtime_error("Failed to read rotation_global in VP file");
-
-    int nparam_full_actual = sizes[0];
-    int nparam_full        = myVarsFull.size();
-
-    if (nparam_full != nparam_full_actual)
-    {
-      std::ostringstream tmp_err;
-      tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file ("
-              << nparam_full_actual << ")";
-      throw std::runtime_error(tmp_err.str());
+        hin.pop();
+
+        applyRotationHistory();
     }
-    std::vector<RealType> full_params(nparam_full);
-    hin.read(full_params, rot_global_name);
-    for (int i = 0; i < nparam_full; i++)
-      myVarsFull[i] = full_params[i];
 
-    hin.pop();
+    hin.push("rotation_params", false);
+    std::string rot_param_name =
+        std::string("rotation_params_") + SPOSetT<T>::getName();
 
-    applyFullRotation(full_params, true);
-  }
-  else if (grp_hist_exists)
-  {
-    hin.push("rotation_history", false);
-    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
-    std::vector<int> sizes(2);
-    if (!hin.getShape<RealType>(rot_hist_name, sizes))
-      throw std::runtime_error("Failed to read rotation history in VP file");
-
-    int rows = sizes[0];
-    int cols = sizes[1];
-    history_params_.resize(rows);
-    Matrix<RealType> tmp(rows, cols);
-    hin.read(tmp, rot_hist_name);
-    for (size_t i = 0; i < rows; i++)
-    {
-      history_params_[i].resize(cols);
-      for (size_t j = 0; j < cols; j++)
-        history_params_[i][j] = tmp(i, j);
+    std::vector<int> sizes(1);
+    if (!hin.getShape<RealType>(rot_param_name, sizes))
+        throw std::runtime_error("Failed to read rotation_params in VP file");
+
+    int nparam_actual = sizes[0];
+    int nparam = this->myVars.size();
+    if (nparam != nparam_actual) {
+        std::ostringstream tmp_err;
+        tmp_err << "Expected number of rotation parameters (" << nparam
+                << ") does not match number in file (" << nparam_actual << ")";
+        throw std::runtime_error(tmp_err.str());
     }
 
+    std::vector<RealType> params(nparam);
+    hin.read(params, rot_param_name);
+    for (int i = 0; i < nparam; i++)
+        this->myVars[i] = params[i];
+
     hin.pop();
 
-    applyRotationHistory();
-  }
+    hin.pop();
+}
 
-  hin.push("rotation_params", false);
-  std::string rot_param_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+template <typename T>
+void
+RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+{
+#if !defined(QMC_COMPLEX)
+    /* Only rebuild optimized variables if more after-rotation orbitals are
+     * needed Consider ROHF, there is only one set of SPO for both spin up and
+     * down Nup > Ndown. nel_major_ will be set Nup.
+     *
+     * Use the size of myVars as a flag to avoid building the rotation
+     * parameters again when a clone is made (the DiracDeterminant constructor
+     * calls buildOptVariables)
+     */
+    if (nel > nel_major_ && this->myVars.size() == 0) {
+        nel_major_ = nel;
 
-  std::vector<int> sizes(1);
-  if (!hin.getShape<RealType>(rot_param_name, sizes))
-    throw std::runtime_error("Failed to read rotation_params in VP file");
+        const size_t nmo = Phi->getOrbitalSetSize();
 
-  int nparam_actual = sizes[0];
-  int nparam        = this->myVars.size();
-  if (nparam != nparam_actual)
-  {
-    std::ostringstream tmp_err;
-    tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file ("
-            << nparam_actual << ")";
-    throw std::runtime_error(tmp_err.str());
-  }
+        // create active rotation parameter indices
+        RotationIndices created_m_act_rot_inds;
 
-  std::vector<RealType> params(nparam);
-  hin.read(params, rot_param_name);
-  for (int i = 0; i < nparam; i++)
-    this->myVars[i] = params[i];
+        RotationIndices created_full_rot_inds;
+        if (use_global_rot_)
+            createRotationIndicesFull(nel, nmo, created_full_rot_inds);
 
-  hin.pop();
+        createRotationIndices(nel, nmo, created_m_act_rot_inds);
 
-  hin.pop();
+        buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
+    }
+#endif
 }
 
-template<typename T>
-void RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+template <typename T>
+void
+RotatedSPOsT<T>::buildOptVariables(
+    const RotationIndices& rotations, const RotationIndices& full_rotations)
 {
 #if !defined(QMC_COMPLEX)
-  /* Only rebuild optimized variables if more after-rotation orbitals are
-	 * needed Consider ROHF, there is only one set of SPO for both spin up and
-	 * down Nup > Ndown. nel_major_ will be set Nup.
-	 *
-	 * Use the size of myVars as a flag to avoid building the rotation
-	 * parameters again when a clone is made (the DiracDeterminant constructor
-	 * calls buildOptVariables)
-	 */
-  if (nel > nel_major_ && this->myVars.size() == 0)
-  {
-    nel_major_ = nel;
-
     const size_t nmo = Phi->getOrbitalSetSize();
 
-    // create active rotation parameter indices
-    RotationIndices created_m_act_rot_inds;
+    // create active rotations
+    m_act_rot_inds = rotations;
 
-    RotationIndices created_full_rot_inds;
     if (use_global_rot_)
-      createRotationIndicesFull(nel, nmo, created_full_rot_inds);
+        m_full_rot_inds = full_rotations;
 
-    createRotationIndices(nel, nmo, created_m_act_rot_inds);
+    if (use_global_rot_)
+        app_log() << "Orbital rotation using global rotation" << std::endl;
+    else
+        app_log() << "Orbital rotation using history" << std::endl;
 
-    buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
-  }
-#endif
-}
+    // This will add the orbital rotation parameters to myVars
+    // and will also read in initial parameter values supplied in input file
+    int p, q;
+    int nparams_active = m_act_rot_inds.size();
 
-template<typename T>
-void RotatedSPOsT<T>::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations)
-{
-#if !defined(QMC_COMPLEX)
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  // create active rotations
-  m_act_rot_inds = rotations;
-
-  if (use_global_rot_)
-    m_full_rot_inds = full_rotations;
-
-  if (use_global_rot_)
-    app_log() << "Orbital rotation using global rotation" << std::endl;
-  else
-    app_log() << "Orbital rotation using history" << std::endl;
-
-  // This will add the orbital rotation parameters to myVars
-  // and will also read in initial parameter values supplied in input file
-  int p, q;
-  int nparams_active = m_act_rot_inds.size();
-
-  app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl;
-  if (params_supplied)
-    if (nparams_active != params.size())
-      throw std::runtime_error("The number of supplied orbital rotation parameters does not "
-                               "match number prdouced by the slater "
-                               "expansion. \n");
-
-  this->myVars.clear();
-  for (int i = 0; i < nparams_active; i++)
-  {
-    p = m_act_rot_inds[i].first;
-    q = m_act_rot_inds[i].second;
-    std::stringstream sstr;
-    sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "")
-         << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q;
-
-    // If the user input parameters, use those. Otherwise, initialize the
-    // parameters to zero
+    app_log() << "nparams_active: " << nparams_active
+              << " params2.size(): " << params.size() << std::endl;
     if (params_supplied)
-    {
-      this->myVars.insert(sstr.str(), params[i]);
+        if (nparams_active != params.size())
+            throw std::runtime_error(
+                "The number of supplied orbital rotation parameters does not "
+                "match number prdouced by the slater "
+                "expansion. \n");
+
+    this->myVars.clear();
+    for (int i = 0; i < nparams_active; i++) {
+        p = m_act_rot_inds[i].first;
+        q = m_act_rot_inds[i].second;
+        std::stringstream sstr;
+        sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
+             << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
+             << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
+             << (q < 1000 ? "0" : "") << q;
+
+        // If the user input parameters, use those. Otherwise, initialize the
+        // parameters to zero
+        if (params_supplied) {
+            this->myVars.insert(sstr.str(), params[i]);
+        }
+        else {
+            this->myVars.insert(sstr.str(), 0.0);
+        }
     }
-    else
-    {
-      this->myVars.insert(sstr.str(), 0.0);
+
+    if (use_global_rot_) {
+        myVarsFull.clear();
+        for (int i = 0; i < m_full_rot_inds.size(); i++) {
+            p = m_full_rot_inds[i].first;
+            q = m_full_rot_inds[i].second;
+            std::stringstream sstr;
+            sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
+                 << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
+                 << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
+                 << (q < 1000 ? "0" : "") << q;
+
+            if (params_supplied && i < m_act_rot_inds.size())
+                myVarsFull.insert(sstr.str(), params[i]);
+            else
+                myVarsFull.insert(sstr.str(), 0.0);
+        }
+    }
+
+    // Printing the parameters
+    if (true) {
+        app_log() << std::string(16, ' ') << "Parameter name"
+                  << std::string(15, ' ') << "Value\n";
+        this->myVars.print(app_log());
     }
-  }
-
-  if (use_global_rot_)
-  {
-    myVarsFull.clear();
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-    {
-      p = m_full_rot_inds[i].first;
-      q = m_full_rot_inds[i].second;
-      std::stringstream sstr;
-      sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "")
-           << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "")
-           << q;
-
-      if (params_supplied && i < m_act_rot_inds.size())
-        myVarsFull.insert(sstr.str(), params[i]);
-      else
-        myVarsFull.insert(sstr.str(), 0.0);
+
+    if (params_supplied) {
+        std::vector<RealType> param(m_act_rot_inds.size());
+        for (int i = 0; i < m_act_rot_inds.size(); i++)
+            param[i] = this->myVars[i];
+        apply_rotation(param, false);
     }
-  }
-
-  // Printing the parameters
-  if (true)
-  {
-    app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n";
-    this->myVars.print(app_log());
-  }
-
-  if (params_supplied)
-  {
-    std::vector<RealType> param(m_act_rot_inds.size());
-    for (int i = 0; i < m_act_rot_inds.size(); i++)
-      param[i] = this->myVars[i];
-    apply_rotation(param, false);
-  }
 #endif
 }
 
-template<typename T>
-void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool use_stored_copy)
+template <typename T>
+void
+RotatedSPOsT<T>::apply_rotation(
+    const std::vector<RealType>& param, bool use_stored_copy)
 {
-  assert(param.size() == m_act_rot_inds.size());
+    assert(param.size() == m_act_rot_inds.size());
 
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix rot_mat(nmo, nmo);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
+    constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
 
-  /*
-	  rot_mat is now an anti-hermitian matrix. Now we convert
-	  it into a unitary matrix via rot_mat = exp(-rot_mat).
-	  Finally, apply unitary matrix to orbs.
-	*/
-  exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+    /*
+        rot_mat is now an anti-hermitian matrix. Now we convert
+        it into a unitary matrix via rot_mat = exp(-rot_mat).
+        Finally, apply unitary matrix to orbs.
+      */
+    exponentiate_antisym_matrix(rot_mat);
+    Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
-                                         const std::vector<RealType>& old_param,
-                                         std::vector<RealType>& new_param)
+template <typename T>
+void
+RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
+    const std::vector<RealType>& old_param, std::vector<RealType>& new_param)
 {
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix new_rot_mat(nmo, nmo);
-  constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix new_rot_mat(nmo, nmo);
+    constructDeltaRotation(delta_param, old_param, m_act_rot_inds,
+        m_full_rot_inds, new_param, new_rot_mat);
 
-  Phi->applyRotation(new_rot_mat, true);
+    Phi->applyRotation(new_rot_mat, true);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::constructDeltaRotation(const std::vector<RealType>& delta_param,
-                                             const std::vector<RealType>& old_param,
-                                             const RotationIndices& act_rot_inds,
-                                             const RotationIndices& full_rot_inds,
-                                             std::vector<RealType>& new_param,
-                                             ValueMatrix& new_rot_mat)
+template <typename T>
+void
+RotatedSPOsT<T>::constructDeltaRotation(
+    const std::vector<RealType>& delta_param,
+    const std::vector<RealType>& old_param, const RotationIndices& act_rot_inds,
+    const RotationIndices& full_rot_inds, std::vector<RealType>& new_param,
+    ValueMatrix& new_rot_mat)
 {
-  assert(delta_param.size() == act_rot_inds.size());
-  assert(old_param.size() == full_rot_inds.size());
-  assert(new_param.size() == full_rot_inds.size());
+    assert(delta_param.size() == act_rot_inds.size());
+    assert(old_param.size() == full_rot_inds.size());
+    assert(new_param.size() == full_rot_inds.size());
 
-  const size_t nmo = new_rot_mat.rows();
-  assert(new_rot_mat.rows() == new_rot_mat.cols());
+    const size_t nmo = new_rot_mat.rows();
+    assert(new_rot_mat.rows() == new_rot_mat.cols());
 
-  ValueMatrix old_rot_mat(nmo, nmo);
+    ValueMatrix old_rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
-  exponentiate_antisym_matrix(old_rot_mat);
+    constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
+    exponentiate_antisym_matrix(old_rot_mat);
 
-  ValueMatrix delta_rot_mat(nmo, nmo);
+    ValueMatrix delta_rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
-  exponentiate_antisym_matrix(delta_rot_mat);
+    constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
+    exponentiate_antisym_matrix(delta_rot_mat);
 
-  // Apply delta rotation to old rotation.
-  BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(),
-             nmo);
+    // Apply delta rotation to old rotation.
+    BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo,
+        old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), nmo);
 
-  ValueMatrix log_rot_mat(nmo, nmo);
-  log_antisym_matrix(new_rot_mat, log_rot_mat);
-  extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
+    ValueMatrix log_rot_mat(nmo, nmo);
+    log_antisym_matrix(new_rot_mat, log_rot_mat);
+    extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy)
+template <typename T>
+void
+RotatedSPOsT<T>::applyFullRotation(
+    const std::vector<RealType>& full_param, bool use_stored_copy)
 {
-  assert(full_param.size() == m_full_rot_inds.size());
-
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix rot_mat(nmo, nmo);
-  rot_mat = T(0);
+    assert(full_param.size() == m_full_rot_inds.size());
 
-  constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
-
-  /*
-	  rot_mat is now an anti-hermitian matrix. Now we convert
-	  it into a unitary matrix via rot_mat = exp(-rot_mat).
-	  Finally, apply unitary matrix to orbs.
-	*/
-  exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix rot_mat(nmo, nmo);
+    rot_mat = T(0);
+
+    constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
+
+    /*
+        rot_mat is now an anti-hermitian matrix. Now we convert
+        it into a unitary matrix via rot_mat = exp(-rot_mat).
+        Finally, apply unitary matrix to orbs.
+      */
+    exponentiate_antisym_matrix(rot_mat);
+    Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyRotationHistory()
+template <typename T>
+void
+RotatedSPOsT<T>::applyRotationHistory()
 {
-  for (auto delta_param : history_params_)
-  {
-    apply_rotation(delta_param, false);
-  }
+    for (auto delta_param : history_params_) {
+        apply_rotation(delta_param, false);
+    }
 }
 
 // compute exponential of a real, antisymmetric matrix by diagonalizing and
 // exponentiating eigenvalues
-template<typename T>
-void RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
+template <typename T>
+void
+RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
 {
-  const int n = mat.rows();
-  std::vector<std::complex<RealType>> mat_h(n * n, 0);
-  std::vector<RealType> eval(n, 0);
-  std::vector<std::complex<RealType>> work(2 * n, 0);
-  std::vector<RealType> rwork(3 * n, 0);
-  std::vector<std::complex<RealType>> mat_d(n * n, 0);
-  std::vector<std::complex<RealType>> mat_t(n * n, 0);
-  // exponentiating e^X = e^iY (Y hermitian)
-  // i(-iX) = X, so -iX is hermitian
-  // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
-  // construct hermitian analogue of mat by multiplying by -i
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = i; j < n; ++j)
-    {
-      mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
-      mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+    const int n = mat.rows();
+    std::vector<std::complex<RealType>> mat_h(n * n, 0);
+    std::vector<RealType> eval(n, 0);
+    std::vector<std::complex<RealType>> work(2 * n, 0);
+    std::vector<RealType> rwork(3 * n, 0);
+    std::vector<std::complex<RealType>> mat_d(n * n, 0);
+    std::vector<std::complex<RealType>> mat_t(n * n, 0);
+    // exponentiating e^X = e^iY (Y hermitian)
+    // i(-iX) = X, so -iX is hermitian
+    // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
+    // construct hermitian analogue of mat by multiplying by -i
+    for (int i = 0; i < n; ++i) {
+        for (int j = i; j < n; ++j) {
+            mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
+            mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+        }
     }
-  }
-  // diagonalize the matrix
-  char JOBZ('V');
-  char UPLO('U');
-  int N(n);
-  int LDA(n);
-  int LWORK(2 * n);
-  int info = 0;
-  LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info);
-  if (info != 0)
-  {
-    std::ostringstream msg;
-    msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix";
-    throw std::runtime_error(msg.str());
-  }
-  // iterate through diagonal matrix, exponentiate terms
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = 0; j < n; ++j)
-    {
-      mat_d[i + j * n] = (i == j) ? std::exp(std::complex<RealType>(0.0, eval[i])) : std::complex<RealType>(0.0, 0.0);
+    // diagonalize the matrix
+    char JOBZ('V');
+    char UPLO('U');
+    int N(n);
+    int LDA(n);
+    int LWORK(2 * n);
+    int info = 0;
+    LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0),
+        LWORK, &rwork.at(0), info);
+    if (info != 0) {
+        std::ostringstream msg;
+        msg << "heev failed with info = " << info
+            << " in RotatedSPOsT::exponentiate_antisym_matrix";
+        throw std::runtime_error(msg.str());
     }
-  }
-  // perform matrix multiplication
-  // assume row major
-  BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n,
-             std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
-  BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n,
-             std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-    {
-      if (mat_d[i + n * j].imag() > 1e-12)
-      {
-        app_log() << "warning: large imaginary value in orbital "
-                     "rotation matrix: (i,j) = ("
-                  << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl;
-      }
-      mat[j][i] = mat_d[i + n * j].real();
+    // iterate through diagonal matrix, exponentiate terms
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            mat_d[i + j * n] = (i == j) ?
+                std::exp(std::complex<RealType>(0.0, eval[i])) :
+                std::complex<RealType>(0.0, 0.0);
+        }
     }
+    // perform matrix multiplication
+    // assume row major
+    BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0),
+        n, &mat_h.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
+    BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0),
+        n, &mat_t.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j) {
+            if (mat_d[i + n * j].imag() > 1e-12) {
+                app_log() << "warning: large imaginary value in orbital "
+                             "rotation matrix: (i,j) = ("
+                          << i << "," << j
+                          << "), im = " << mat_d[i + n * j].imag() << std::endl;
+            }
+            mat[j][i] = mat_d[i + n * j].real();
+        }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
+template <typename T>
+void
+RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
 {
-  const int n = mat.rows();
-  std::vector<RealType> mat_h(n * n, 0);
-  std::vector<RealType> eval_r(n, 0);
-  std::vector<RealType> eval_i(n, 0);
-  std::vector<RealType> mat_l(n * n, 0);
-  std::vector<RealType> work(4 * n, 0);
-
-  std::vector<std::complex<RealType>> mat_cd(n * n, 0);
-  std::vector<std::complex<RealType>> mat_cl(n * n, 0);
-  std::vector<std::complex<RealType>> mat_ch(n * n, 0);
-
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-      mat_h[i + n * j] = mat[i][j];
-
-  // diagonalize the matrix
-  char JOBL('V');
-  char JOBR('N');
-  int N(n);
-  int LDA(n);
-  int LWORK(4 * n);
-  int info = 0;
-  LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA,
-               &work.at(0), &LWORK, &info);
-  if (info != 0)
-  {
-    std::ostringstream msg;
-    msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix";
-    throw std::runtime_error(msg.str());
-  }
-
-  // iterate through diagonal matrix, take log
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = 0; j < n; ++j)
-    {
-      auto tmp = (i == j) ? std::log(std::complex<RealType>(eval_r[i], eval_i[i])) : std::complex<RealType>(0.0, 0.0);
-      mat_cd[i + j * n] = tmp;
-
-      if (eval_i[j] > 0.0)
-      {
-        mat_cl[i + j * n]       = std::complex<RealType>(mat_l[i + j * n], mat_l[i + (j + 1) * n]);
-        mat_cl[i + (j + 1) * n] = std::complex<RealType>(mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
-      }
-      else if (!(eval_i[j] < 0.0))
-      {
-        mat_cl[i + j * n] = std::complex<RealType>(mat_l[i + j * n], 0.0);
-      }
+    const int n = mat.rows();
+    std::vector<RealType> mat_h(n * n, 0);
+    std::vector<RealType> eval_r(n, 0);
+    std::vector<RealType> eval_i(n, 0);
+    std::vector<RealType> mat_l(n * n, 0);
+    std::vector<RealType> work(4 * n, 0);
+
+    std::vector<std::complex<RealType>> mat_cd(n * n, 0);
+    std::vector<std::complex<RealType>> mat_cl(n * n, 0);
+    std::vector<std::complex<RealType>> mat_ch(n * n, 0);
+
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j)
+            mat_h[i + n * j] = mat[i][j];
+
+    // diagonalize the matrix
+    char JOBL('V');
+    char JOBR('N');
+    int N(n);
+    int LDA(n);
+    int LWORK(4 * n);
+    int info = 0;
+    LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0),
+        &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, &work.at(0), &LWORK,
+        &info);
+    if (info != 0) {
+        std::ostringstream msg;
+        msg << "heev failed with info = " << info
+            << " in RotatedSPOsT::log_antisym_matrix";
+        throw std::runtime_error(msg.str());
     }
-  }
-
-  RealType one(1.0);
-  RealType zero(0.0);
-  BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n);
-  BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n);
-
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-    {
-      if (mat_cd[i + n * j].imag() > 1e-12)
-      {
-        app_log() << "warning: large imaginary value in antisymmetric "
-                     "matrix: (i,j) = ("
-                  << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl;
-      }
-      output[i][j] = mat_cd[i + n * j].real();
+
+    // iterate through diagonal matrix, take log
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            auto tmp = (i == j) ?
+                std::log(std::complex<RealType>(eval_r[i], eval_i[i])) :
+                std::complex<RealType>(0.0, 0.0);
+            mat_cd[i + j * n] = tmp;
+
+            if (eval_i[j] > 0.0) {
+                mat_cl[i + j * n] = std::complex<RealType>(
+                    mat_l[i + j * n], mat_l[i + (j + 1) * n]);
+                mat_cl[i + (j + 1) * n] = std::complex<RealType>(
+                    mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
+            }
+            else if (!(eval_i[j] < 0.0)) {
+                mat_cl[i + j * n] =
+                    std::complex<RealType>(mat_l[i + j * n], 0.0);
+            }
+        }
     }
+
+    RealType one(1.0);
+    RealType zero(0.0);
+    BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero,
+        &mat_ch.at(0), n);
+    BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero,
+        &mat_cd.at(0), n);
+
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j) {
+            if (mat_cd[i + n * j].imag() > 1e-12) {
+                app_log() << "warning: large imaginary value in antisymmetric "
+                             "matrix: (i,j) = ("
+                          << i << "," << j
+                          << "), im = " << mat_cd[i + n * j].imag()
+                          << std::endl;
+            }
+            output[i][j] = mat_cd[i + n * j].real();
+        }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
-                                          const opt_variables_type& optvars,
-                                          ValueVector& psi,
-                                          const ValueVector& psiinv,
-                                          std::vector<T>& ratios,
-                                          Matrix<T>& dratios,
-                                          int FirstIndex,
-                                          int LastIndex)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+    const OptVariablesType<T>& optvars, ValueVector& psi,
+    const ValueVector& psiinv, std::vector<T>& ratios, Matrix<T>& dratios,
+    int FirstIndex, int LastIndex)
 {
-  Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
+
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
+
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
+
+    const ParticleSetT<T>& P = VP.getRefPS();
+    int iel = VP.refPtcl;
+
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
+
+    Invert(psiM_inv.data(), nel, nel);
 
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+    ValueMatrix T_orig;
+    T_orig.resize(nel, nmo);
+
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_orig.data(), nmo);
+
+    ValueMatrix T_mat;
+    T_mat.resize(nel, nmo);
 
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
+    ValueVector tmp_psi;
+    tmp_psi.resize(nmo);
 
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
+    for (int iat = 0; iat < VP.getTotalNum(); iat++) {
+        Phi->evaluateValue(VP, iat, tmp_psi);
 
-  const ParticleSet& P = VP.getRefPS();
-  int iel              = VP.refPtcl;
+        for (int j = 0; j < nmo; j++)
+            psiM_all(iel - FirstIndex, j) = tmp_psi[j];
 
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+        for (int i = 0; i < nel; i++)
+            for (int j = 0; j < nel; j++)
+                psiM_inv(i, j) = psiM_all(i, j);
 
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
+        Invert(psiM_inv.data(), nel, nel);
 
-  Invert(psiM_inv.data(), nel, nel);
+        const T* const A(psiM_all.data());
+        const T* const Ainv(psiM_inv.data());
 
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  ValueMatrix T_orig;
-  T_orig.resize(nel, nmo);
+        // The matrix A is rectangular.  Ainv is the inverse of the square part
+        // of the matrix. The multiply of Ainv and the square part of A is just
+        // the identity. This multiply could be reduced to Ainv and the
+        // non-square part of A.
+        BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+            T_mat.data(), nmo);
+
+        for (int i = 0; i < m_act_rot_inds.size(); i++) {
+            int kk = this->myVars.where(i);
+            if (kk >= 0) {
+                const int p = m_act_rot_inds.at(i).first;
+                const int q = m_act_rot_inds.at(i).second;
+                dratios(iat, kk) = T_mat(p, q) -
+                    T_orig(p, q); // dratio size is (nknot, num_vars)
+            }
+        }
+    }
+}
 
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo);
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi, int FirstIndex,
+    int LastIndex)
+{
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
 
-  ValueMatrix T_mat;
-  T_mat.resize(nel, nmo);
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
 
-  ValueVector tmp_psi;
-  tmp_psi.resize(nmo);
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
 
-  for (int iat = 0; iat < VP.getTotalNum(); iat++)
-  {
-    Phi->evaluateValue(VP, iat, tmp_psi);
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
 
-    for (int j = 0; j < nmo; j++)
-      psiM_all(iel - FirstIndex, j) = tmp_psi[j];
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
     for (int i = 0; i < nel; i++)
-      for (int j = 0; j < nel; j++)
-        psiM_inv(i, j) = psiM_all(i, j);
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
 
     Invert(psiM_inv.data(), nel, nel);
 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
     const T* const A(psiM_all.data());
     const T* const Ainv(psiM_inv.data());
-
-    // The matrix A is rectangular.  Ainv is the inverse of the square part
-    // of the matrix. The multiply of Ainv and the square part of A is just
-    // the identity. This multiply could be reduced to Ainv and the
-    // non-square part of A.
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
-
-    for (int i = 0; i < m_act_rot_inds.size(); i++)
-    {
-      int kk = this->myVars.where(i);
-      if (kk >= 0)
-      {
-        const int p      = m_act_rot_inds.at(i).first;
-        const int q      = m_act_rot_inds.at(i).second;
-        dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars)
-      }
+    ValueMatrix T_mat;
+    T_mat.resize(nel, nmo);
+
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_mat.data(), nmo);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int kk = this->myVars.where(i);
+        if (kk >= 0) {
+            const int p = m_act_rot_inds.at(i).first;
+            const int q = m_act_rot_inds.at(i).second;
+            dlogpsi[kk] = T_mat(p, q);
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                            const opt_variables_type& optvars,
-                                            Vector<T>& dlogpsi,
-                                            int FirstIndex,
-                                            int LastIndex)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivatives(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const int& FirstIndex, const int& LastIndex)
 {
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
 
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+    myG_temp.resize(nel);
+    myG_J.resize(nel);
+    myL_temp.resize(nel);
+    myL_J.resize(nel);
 
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
+    myG_temp = 0;
+    myG_J = 0;
+    myL_temp = 0;
+    myL_J = 0;
 
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+    Bbar.resize(nel, nmo);
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
 
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
+    Bbar = 0;
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
 
-  Invert(psiM_inv.data(), nel, nel);
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  ValueMatrix T_mat;
-  T_mat.resize(nel, nmo);
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
 
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+    Invert(psiM_inv.data(), nel, nel);
 
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int kk = this->myVars.where(i);
-    if (kk >= 0)
-    {
-      const int p = m_act_rot_inds.at(i).first;
-      const int q = m_act_rot_inds.at(i).second;
-      dlogpsi[kk] = T_mat(p, q);
+    // current value of Gradient and Laplacian
+    //  gradient components
+    for (int a = 0; a < nel; a++)
+        for (int i = 0; i < nel; i++)
+            for (int k = 0; k < 3; k++)
+                myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
+    // laplacian components
+    for (int a = 0; a < nel; a++) {
+        for (int i = 0; i < nel; i++)
+            myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
     }
-  }
-}
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
-                                          const opt_variables_type& optvars,
-                                          Vector<T>& dlogpsi,
-                                          Vector<T>& dhpsioverpsi,
-                                          const int& FirstIndex,
-                                          const int& LastIndex)
-{
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
-  myG_temp.resize(nel);
-  myG_J.resize(nel);
-  myL_temp.resize(nel);
-  myL_J.resize(nel);
-
-  myG_temp = 0;
-  myG_J    = 0;
-  myL_temp = 0;
-  myL_J    = 0;
-
-  Bbar.resize(nel, nmo);
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
-
-  Bbar       = 0;
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
-
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
-
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
-
-  Invert(psiM_inv.data(), nel, nel);
-
-  // current value of Gradient and Laplacian
-  //  gradient components
-  for (int a = 0; a < nel; a++)
-    for (int i = 0; i < nel; i++)
-      for (int k = 0; k < 3; k++)
-        myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
-  // laplacian components
-  for (int a = 0; a < nel; a++)
-  {
+    // calculation of myG_J which will be used to represent
+    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
+    // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
+    // this is what myL_J will hold
+    for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) {
+        myG_J[a] = (P.G[iat] - myG_temp[a]);
+        myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
+    }
+    // possibly replace wit BLAS calls
     for (int i = 0; i < nel; i++)
-      myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
-  }
-
-  // calculation of myG_J which will be used to represent
-  // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-  // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
-  // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
-  // this is what myL_J will hold
-  for (int a = 0, iat = FirstIndex; a < nel; a++, iat++)
-  {
-    myG_J[a] = (P.G[iat] - myG_temp[a]);
-    myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
-  }
-  // possibly replace wit BLAS calls
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nmo; j++)
-      Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j);
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  const T* const B(Bbar.data());
-  ValueMatrix T_mat;
-  ValueMatrix Y1;
-  ValueMatrix Y2;
-  ValueMatrix Y3;
-  ValueMatrix Y4;
-  T_mat.resize(nel, nmo);
-  Y1.resize(nel, nel);
-  Y2.resize(nel, nmo);
-  Y3.resize(nel, nmo);
-  Y4.resize(nel, nmo);
-
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
-  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y1.data(), nel);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), nel, T(0.0), Y2.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y3.data(), nmo);
-
-  // possibly replace with BLAS call
-  Y4 = Y3 - Y2;
-
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int kk = this->myVars.where(i);
-    if (kk >= 0)
-    {
-      const int p = m_act_rot_inds.at(i).first;
-      const int q = m_act_rot_inds.at(i).second;
-      dlogpsi[kk] += T_mat(p, q);
-      dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+        for (int j = 0; j < nmo; j++)
+            Bbar(i, j) = d2psiM_all(i, j) +
+                2.0 * dot(myG_J[i], dpsiM_all(i, j)) +
+                myL_J[i] * psiM_all(i, j);
+
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+    const T* const B(Bbar.data());
+    ValueMatrix T_mat;
+    ValueMatrix Y1;
+    ValueMatrix Y2;
+    ValueMatrix Y3;
+    ValueMatrix Y4;
+    T_mat.resize(nel, nmo);
+    Y1.resize(nel, nel);
+    Y2.resize(nel, nmo);
+    Y3.resize(nel, nmo);
+    Y4.resize(nel, nmo);
+
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_mat.data(), nmo);
+    BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0),
+        Y1.data(), nel);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(),
+        nel, T(0.0), Y2.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0),
+        Y3.data(), nmo);
+
+    // possibly replace with BLAS call
+    Y4 = Y3 - Y2;
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int kk = this->myVars.where(i);
+        if (kk >= 0) {
+            const int p = m_act_rot_inds.at(i).first;
+            const int q = m_act_rot_inds.at(i).second;
+            dlogpsi[kk] += T_mat(p, q);
+            dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
-                                          const opt_variables_type& optvars,
-                                          Vector<T>& dlogpsi,
-                                          Vector<T>& dhpsioverpsi,
-                                          const T& psiCurrent,
-                                          const std::vector<T>& Coeff,
-                                          const std::vector<size_t>& C2node_up,
-                                          const std::vector<size_t>& C2node_dn,
-                                          const ValueVector& detValues_up,
-                                          const ValueVector& detValues_dn,
-                                          const GradMatrix& grads_up,
-                                          const GradMatrix& grads_dn,
-                                          const ValueMatrix& lapls_up,
-                                          const ValueMatrix& lapls_dn,
-                                          const ValueMatrix& M_up,
-                                          const ValueMatrix& M_dn,
-                                          const ValueMatrix& Minv_up,
-                                          const ValueMatrix& Minv_dn,
-                                          const GradMatrix& B_grad,
-                                          const ValueMatrix& B_lapl,
-                                          const std::vector<int>& detData_up,
-                                          const size_t N1,
-                                          const size_t N2,
-                                          const size_t NP1,
-                                          const size_t NP2,
-                                          const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivatives(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const GradMatrix& grads_up, const GradMatrix& grads_dn,
+    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
+    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
+    const size_t NP1, const size_t NP2,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  bool recalculate(false);
-  for (int k = 0; k < this->myVars.size(); ++k)
-  {
-    int kk = this->myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (optvars.recompute(kk))
-      recalculate = true;
-  }
-  if (recalculate)
-  {
-    ParticleSet::ParticleGradient myG_temp, myG_J;
-    ParticleSet::ParticleLaplacian myL_temp, myL_J;
-    const int NP = P.getTotalNum();
-    myG_temp.resize(NP);
-    myG_temp = 0.0;
-    myL_temp.resize(NP);
-    myL_temp = 0.0;
-    myG_J.resize(NP);
-    myG_J = 0.0;
-    myL_J.resize(NP);
-    myL_J            = 0.0;
-    const size_t nmo = Phi->getOrbitalSetSize();
-    const size_t nel = P.last(0) - P.first(0);
-
-    const T* restrict C_p = Coeff.data();
-    for (int i = 0; i < Coeff.size(); i++)
-    {
-      const size_t upC = C2node_up[i];
-      const size_t dnC = C2node_dn[i];
-      const T tmp1     = C_p[i] * detValues_dn[dnC];
-      const T tmp2     = C_p[i] * detValues_up[upC];
-      for (size_t k = 0, j = N1; k < NP1; k++, j++)
-      {
-        myG_temp[j] += tmp1 * grads_up(upC, k);
-        myL_temp[j] += tmp1 * lapls_up(upC, k);
-      }
-      for (size_t k = 0, j = N2; k < NP2; k++, j++)
-      {
-        myG_temp[j] += tmp2 * grads_dn(dnC, k);
-        myL_temp[j] += tmp2 * lapls_dn(dnC, k);
-      }
+    bool recalculate(false);
+    for (int k = 0; k < this->myVars.size(); ++k) {
+        int kk = this->myVars.where(k);
+        if (kk < 0)
+            continue;
+        if (optvars.recompute(kk))
+            recalculate = true;
     }
+    if (recalculate) {
+        typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
+        typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
+        const int NP = P.getTotalNum();
+        myG_temp.resize(NP);
+        myG_temp = 0.0;
+        myL_temp.resize(NP);
+        myL_temp = 0.0;
+        myG_J.resize(NP);
+        myG_J = 0.0;
+        myL_J.resize(NP);
+        myL_J = 0.0;
+        const size_t nmo = Phi->getOrbitalSetSize();
+        const size_t nel = P.last(0) - P.first(0);
+
+        const T* restrict C_p = Coeff.data();
+        for (int i = 0; i < Coeff.size(); i++) {
+            const size_t upC = C2node_up[i];
+            const size_t dnC = C2node_dn[i];
+            const T tmp1 = C_p[i] * detValues_dn[dnC];
+            const T tmp2 = C_p[i] * detValues_up[upC];
+            for (size_t k = 0, j = N1; k < NP1; k++, j++) {
+                myG_temp[j] += tmp1 * grads_up(upC, k);
+                myL_temp[j] += tmp1 * lapls_up(upC, k);
+            }
+            for (size_t k = 0, j = N2; k < NP2; k++, j++) {
+                myG_temp[j] += tmp2 * grads_dn(dnC, k);
+                myL_temp[j] += tmp2 * lapls_dn(dnC, k);
+            }
+        }
 
-    myG_temp *= (1 / psiCurrent);
-    myL_temp *= (1 / psiCurrent);
+        myG_temp *= (1 / psiCurrent);
+        myL_temp *= (1 / psiCurrent);
+
+        // calculation of myG_J which will be used to represent
+        // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+        // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
+        // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
+        // \psi}{\psi} and this is what myL_J will hold
+        for (int iat = 0; iat < (myL_temp.size()); iat++) {
+            myG_J[iat] = (P.G[iat] - myG_temp[iat]);
+            myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+        }
 
-    // calculation of myG_J which will be used to represent
-    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
-    // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
-    // \psi}{\psi} and this is what myL_J will hold
-    for (int iat = 0; iat < (myL_temp.size()); iat++)
-    {
-      myG_J[iat] = (P.G[iat] - myG_temp[iat]);
-      myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+        table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo,
+            psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn,
+            grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up,
+            Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
     }
-
-    table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn,
-                      detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn,
-                      B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                            const opt_variables_type& optvars,
-                                            Vector<ValueType>& dlogpsi,
-                                            const ValueType& psiCurrent,
-                                            const std::vector<ValueType>& Coeff,
-                                            const std::vector<size_t>& C2node_up,
-                                            const std::vector<size_t>& C2node_dn,
-                                            const ValueVector& detValues_up,
-                                            const ValueVector& detValues_dn,
-                                            const ValueMatrix& M_up,
-                                            const ValueMatrix& M_dn,
-                                            const ValueMatrix& Minv_up,
-                                            const ValueMatrix& Minv_dn,
-                                            const std::vector<int>& detData_up,
-                                            const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<ValueType>& dlogpsi,
+    const ValueType& psiCurrent, const std::vector<ValueType>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const std::vector<int>& detData_up,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  bool recalculate(false);
-  for (int k = 0; k < this->myVars.size(); ++k)
-  {
-    int kk = this->myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (optvars.recompute(kk))
-      recalculate = true;
-  }
-  if (recalculate)
-  {
-    const size_t nmo = Phi->getOrbitalSetSize();
-    const size_t nel = P.last(0) - P.first(0);
+    bool recalculate(false);
+    for (int k = 0; k < this->myVars.size(); ++k) {
+        int kk = this->myVars.where(k);
+        if (kk < 0)
+            continue;
+        if (optvars.recompute(kk))
+            recalculate = true;
+    }
+    if (recalculate) {
+        const size_t nmo = Phi->getOrbitalSetSize();
+        const size_t nel = P.last(0) - P.first(0);
 
-    table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up,
-                        M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl);
-  }
+        table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up,
+            C2node_dn, detValues_up, detValues_dn, M_up, M_dn, Minv_up, Minv_dn,
+            detData_up, lookup_tbl);
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi,
-                                        Vector<T>& dhpsioverpsi,
-                                        const ParticleSet::ParticleLaplacian& myL_J,
-                                        const ParticleSet::ParticleGradient& myG_J,
-                                        const size_t nel,
-                                        const size_t nmo,
-                                        const T& psiCurrent,
-                                        const std::vector<T>& Coeff,
-                                        const std::vector<size_t>& C2node_up,
-                                        const std::vector<size_t>& C2node_dn,
-                                        const ValueVector& detValues_up,
-                                        const ValueVector& detValues_dn,
-                                        const GradMatrix& grads_up,
-                                        const GradMatrix& grads_dn,
-                                        const ValueMatrix& lapls_up,
-                                        const ValueMatrix& lapls_dn,
-                                        const ValueMatrix& M_up,
-                                        const ValueMatrix& M_dn,
-                                        const ValueMatrix& Minv_up,
-                                        const ValueMatrix& Minv_dn,
-                                        const GradMatrix& B_grad,
-                                        const ValueMatrix& B_lapl,
-                                        const std::vector<int>& detData_up,
-                                        const size_t N1,
-                                        const size_t N2,
-                                        const size_t NP1,
-                                        const size_t NP2,
-                                        const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi,
+    const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
+    const typename ParticleSetT<T>::ParticleGradient& myG_J, const size_t nel,
+    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const GradMatrix& grads_up, const GradMatrix& grads_dn,
+    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
+    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
+    const size_t NP1, const size_t NP2,
+    const std::vector<std::vector<int>>& lookup_tbl)
 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GUIDE TO THE MATICES BEING BUILT
 ----------------------------------------------
@@ -1045,8 +1029,8 @@ determiant the table method is employed to calculate the contributions to the
 parameter derivatives (dhpsioverpsi/dlogpsi)
 
   loop through unquie determinants
-	loop through parameters
-	  evaluate contributaion to dlogpsi and dhpsioverpsi
+    loop through parameters
+      evaluate contributaion to dlogpsi and dhpsioverpsi
 \noindent
 
   BLAS GUIDE  for matrix multiplication of  [  alpha * A.B + beta * C = C ]
@@ -1062,16 +1046,16 @@ This notation is inspired by http://dx.doi.org/10.1063/1.4948778
 \newline
 \hfill\break
 $
-	A_{i,j}=\phi_j(r_{i}) \\
-	T = A^{-1} \widetilde{A} \\
-	B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+    A_{i,j}=\phi_j(r_{i}) \\
+    T = A^{-1} \widetilde{A} \\
+    B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
 \phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\
-	\hat{O_{I}} = \hat{O}D_{I} \\
-	D_{I}=det(A_{I}) \newline
-	\psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
-	\Psi_{total} = \psi_{J}\psi_{MS} \\
-	\alpha_{I} = P^{T}_{I}TQ_{I} \\
-	M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
+    \hat{O_{I}} = \hat{O}D_{I} \\
+    D_{I}=det(A_{I}) \newline
+    \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
+    \Psi_{total} = \psi_{J}\psi_{MS} \\
+    \alpha_{I} = P^{T}_{I}TQ_{I} \\
+    M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
 A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\
 $
 \newline
@@ -1090,10 +1074,10 @@ Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\
 Below is a translation of the shorthand I use to represent matrices independent
 of ``excitation matrix". \newline \hfill\break
 $
-	Y_{1} =  A^{-1}B   \\
-	Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
-	Y_{3} = A^{-1}\widetilde{B} \\
-	Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
+    Y_{1} =  A^{-1}B   \\
+    Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
+    Y_{3} = A^{-1}\widetilde{B} \\
+    Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
 )\\
 $
 \newline
@@ -1106,14 +1090,14 @@ reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix.
 \newline
 \hfill\break
 $
-	Y_{5} = TQ \\
-	Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
-	Y_{7} = \alpha_{I}^{-1} P^{T} \\
-	Y_{11} = \widetilde{M}Q \\
-	Y_{23} = P^{T}\widetilde{M}Q \\
-	Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
-	Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
-	Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
+    Y_{5} = TQ \\
+    Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
+    Y_{7} = \alpha_{I}^{-1} P^{T} \\
+    Y_{11} = \widetilde{M}Q \\
+    Y_{23} = P^{T}\widetilde{M}Q \\
+    Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
+    Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
+    Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
 $
 \newline
 So far you will notice that I have not included up or down arrows to specify
@@ -1125,14 +1109,14 @@ derivatives. Of course the down spin expression can be retrieved by swapping the
 up and down arrows. I have dubbed any expression with lowercase p prefix as a
 "precursor" to an expression actually used... \newline \hfill\break
 $
-	\dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
-	\ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
-	pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
+    \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
+    \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
+    pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
 (Q\alpha_{I}^{-1}P^{T}) \\
-	pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
+    pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
 \alpha_{I}^{-1}P^{T}) \\
 $
 \newline
@@ -1140,34 +1124,34 @@ Now these p matrices will be used to make various expressions via BLAS commands.
 \newline
 \hfill\break
 $
-	K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
-	K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
-	TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
-	K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\
-	TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\
-	K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
+    TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
 det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\
-	MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
+    MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
 det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\
-	K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+    K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+    TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
-	K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
+    K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
+    TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
 \\ \\
-	K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
+    K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
 M_{I} \alpha_{I}^{-1}P^{T} T)  \\
-	TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
+    TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
 M_{I} \alpha_{I}^{-1}P^{T} T)  \\
 $
 \newline
@@ -1187,500 +1171,521 @@ to each element will be called B_bar
 $
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
 {
-  ValueMatrix Table;
-  ValueMatrix Bbar;
-  ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
-  ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
-
-  Table.resize(nel, nmo);
-
-  Bbar.resize(nel, nmo);
-
-  Y1.resize(nel, nel);
-  Y2.resize(nel, nmo);
-  Y3.resize(nel, nmo);
-  Y4.resize(nel, nmo);
-
-  pK1.resize(nmo, nel);
-  K1T.resize(nmo, nmo);
-  TK1T.resize(nel, nmo);
-
-  pK2.resize(nmo, nel);
-  K2AiB.resize(nmo, nmo);
-  TK2AiB.resize(nel, nmo);
-  K2XA.resize(nmo, nmo);
-  TK2XA.resize(nel, nmo);
-  K2T.resize(nmo, nmo);
-  TK2T.resize(nel, nmo);
-  MK2T.resize(nel, nmo);
-
-  pK3.resize(nmo, nel);
-  K3T.resize(nmo, nmo);
-  TK3T.resize(nel, nmo);
-
-  pK5.resize(nmo, nel);
-  K5T.resize(nmo, nmo);
-  TK5T.resize(nel, nmo);
-
-  const int parameters_size(m_act_rot_inds.size());
-  const int parameter_start_index(0);
-
-  const size_t num_unique_up_dets(detValues_up.size());
-  const size_t num_unique_dn_dets(detValues_dn.size());
-
-  const T* restrict cptr = Coeff.data();
-  const size_t nc        = Coeff.size();
-  const size_t* restrict upC(C2node_up.data());
-  const size_t* restrict dnC(C2node_dn.data());
-  // B_grad holds the gradient operator
-  // B_lapl holds the laplacian operator
-  // B_bar will hold our special O operator
-
-  const int offset1(N1);
-  const int offset2(N2);
-  const int NPother(NP2);
-
-  T* T_(Table.data());
-
-  // possibly replace wit BLAS calls
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nmo; j++)
-      Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j);
-
-  const T* restrict B(Bbar.data());
-  const T* restrict A(M_up.data());
-  const T* restrict Ainv(Minv_up.data());
-  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-  // THIS CASE
-  //  The T matrix should be calculated and stored for use
-  //  T = A^{-1} \widetilde A
-  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-  // and that BLAS commands assume column major
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
-
-  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo);
-
-  // possibly replace with BLAS call
-  Y4 = Y3 - Y2;
-
-  // Need to create the constants: (Oi, const0, const1, const2)to take
-  // advantage of minimal BLAS commands; Oi is the special operator applied to
-  // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
-  //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
-  //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
-  std::vector<RealType> Oi(num_unique_dn_dets);
-
-  for (int index = 0; index < num_unique_dn_dets; index++)
-    for (int iat = 0; iat < NPother; iat++)
-      Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
-          myL_J[offset2 + iat] * detValues_dn[index];
-
-  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
-  // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
-  // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
-  // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
-  RealType const0(0.0), const1(0.0), const2(0.0);
-  for (size_t i = 0; i < nc; ++i)
-  {
-    const RealType c  = cptr[i];
-    const size_t up   = upC[i];
-    const size_t down = dnC[i];
-
-    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
-    const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
-  }
-
-  std::fill(pK1.begin(), pK1.end(), 0.0);
-  std::fill(pK2.begin(), pK2.end(), 0.0);
-  std::fill(pK3.begin(), pK3.end(), 0.0);
-  std::fill(pK5.begin(), pK5.end(), 0.0);
-
-  // Now we are going to loop through all unique determinants.
-  // The few lines above are for the reference matrix contribution.
-  // Although I start the loop below from index 0, the loop only performs
-  // actions when the index is >= 1 the detData object contains all the
-  // information about the P^T and Q matrices (projection matrices) needed in
-  // the table method
-  const int* restrict data_it = detData_up.data();
-  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
-  {
-    const int k = data_it[datum];
-
-    if (k == 0)
-    {
-      datum += 3 * k + 1;
+    ValueMatrix Table;
+    ValueMatrix Bbar;
+    ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
+    ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T,
+        MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
+
+    Table.resize(nel, nmo);
+
+    Bbar.resize(nel, nmo);
+
+    Y1.resize(nel, nel);
+    Y2.resize(nel, nmo);
+    Y3.resize(nel, nmo);
+    Y4.resize(nel, nmo);
+
+    pK1.resize(nmo, nel);
+    K1T.resize(nmo, nmo);
+    TK1T.resize(nel, nmo);
+
+    pK2.resize(nmo, nel);
+    K2AiB.resize(nmo, nmo);
+    TK2AiB.resize(nel, nmo);
+    K2XA.resize(nmo, nmo);
+    TK2XA.resize(nel, nmo);
+    K2T.resize(nmo, nmo);
+    TK2T.resize(nel, nmo);
+    MK2T.resize(nel, nmo);
+
+    pK3.resize(nmo, nel);
+    K3T.resize(nmo, nmo);
+    TK3T.resize(nel, nmo);
+
+    pK5.resize(nmo, nel);
+    K5T.resize(nmo, nmo);
+    TK5T.resize(nel, nmo);
+
+    const int parameters_size(m_act_rot_inds.size());
+    const int parameter_start_index(0);
+
+    const size_t num_unique_up_dets(detValues_up.size());
+    const size_t num_unique_dn_dets(detValues_dn.size());
+
+    const T* restrict cptr = Coeff.data();
+    const size_t nc = Coeff.size();
+    const size_t* restrict upC(C2node_up.data());
+    const size_t* restrict dnC(C2node_dn.data());
+    // B_grad holds the gradient operator
+    // B_lapl holds the laplacian operator
+    // B_bar will hold our special O operator
+
+    const int offset1(N1);
+    const int offset2(N2);
+    const int NPother(NP2);
+
+    T* T_(Table.data());
+
+    // possibly replace wit BLAS calls
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nmo; j++)
+            Bbar(i, j) = B_lapl(i, j) +
+                2.0 * dot(myG_J[i + offset1], B_grad(i, j)) +
+                myL_J[i + offset1] * M_up(i, j);
+
+    const T* restrict B(Bbar.data());
+    const T* restrict A(M_up.data());
+    const T* restrict Ainv(Minv_up.data());
+    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+    // THIS CASE
+    //  The T matrix should be calculated and stored for use
+    //  T = A^{-1} \widetilde A
+    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+    // and that BLAS commands assume column major
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel,
+        RealType(0.0), T_, nmo);
+
+    BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel,
+        RealType(0.0), Y1.data(), nel);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel,
+        RealType(0.0), Y2.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel,
+        RealType(0.0), Y3.data(), nmo);
+
+    // possibly replace with BLAS call
+    Y4 = Y3 - Y2;
+
+    // Need to create the constants: (Oi, const0, const1, const2)to take
+    // advantage of minimal BLAS commands; Oi is the special operator applied to
+    // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
+    //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
+    //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
+    std::vector<RealType> Oi(num_unique_dn_dets);
+
+    for (int index = 0; index < num_unique_dn_dets; index++)
+        for (int iat = 0; iat < NPother; iat++)
+            Oi[index] += lapls_dn(index, iat) +
+                2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
+                myL_J[offset2 + iat] * detValues_dn[index];
+
+    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
+    // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
+    // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
+    // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
+    RealType const0(0.0), const1(0.0), const2(0.0);
+    for (size_t i = 0; i < nc; ++i) {
+        const RealType c = cptr[i];
+        const size_t up = upC[i];
+        const size_t down = dnC[i];
+
+        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+        const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
     }
 
-    else
-    {
-      // Number of rows and cols of P^T
-      const int prows = k;
-      const int pcols = nel;
-      // Number of rows and cols of Q
-      const int qrows = nmo;
-      const int qcols = k;
-
-      Y5.resize(nel, k);
-      Y6.resize(k, k);
-
-      // Any matrix multiplication of P^T or Q is simply a projection
-      // Explicit matrix multiplication can be avoided; instead column or
-      // row copying can be done BlAS::copy(size of col/row being copied,
-      //            Matrix pointer + place to begin copying,
-      //            storage spacing (number of elements btw next row/col
-      //            element), Pointer to resultant matrix + place to begin
-      //            pasting, storage spacing of resultant matrix)
-      // For example the next 4 lines is the matrix multiplication of T*Q
-      // = Y5
-      std::fill(Y5.begin(), Y5.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
-      }
-
-      std::fill(Y6.begin(), Y6.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
-      }
-
-      Vector<T> WS;
-      Vector<IndexType> Piv;
-      WS.resize(k);
-      Piv.resize(k);
-      std::complex<RealType> logdet = 0.0;
-      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-      Y11.resize(nel, k);
-      Y23.resize(k, k);
-      Y24.resize(k, k);
-      Y25.resize(k, k);
-      Y26.resize(k, nel);
-
-      std::fill(Y11.begin(), Y11.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k);
-      }
-
-      std::fill(Y23.begin(), Y23.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1);
-      }
-
-      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k);
-      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k);
-
-      Y26.resize(k, nel);
-
-      std::fill(Y26.begin(), Y26.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      Y7.resize(k, nel);
-
-      std::fill(Y7.begin(), Y7.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      // c_Tr_AlphaI_MI is a constant contributing to constant const2
-      // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
-      RealType c_Tr_AlphaI_MI = 0.0;
-      for (int i = 0; i < k; i++)
-      {
-        c_Tr_AlphaI_MI += Y24(i, i);
-      }
-
-      for (int p = 0; p < lookup_tbl[index].size(); p++)
-      {
-        // el_p is the element position that contains information about
-        // the CI coefficient, and det up/dn values associated with the
-        // current unique determinant
-        const int el_p(lookup_tbl[index][p]);
-        const RealType c  = cptr[el_p];
-        const size_t up   = upC[el_p];
-        const size_t down = dnC[el_p];
-
-        const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
-        const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]);
-        const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]);
-
-        const2 += alpha_1;
-
-        for (int i = 0; i < k; i++)
-        {
-          BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+    std::fill(pK1.begin(), pK1.end(), 0.0);
+    std::fill(pK2.begin(), pK2.end(), 0.0);
+    std::fill(pK3.begin(), pK3.end(), 0.0);
+    std::fill(pK5.begin(), pK5.end(), 0.0);
+
+    // Now we are going to loop through all unique determinants.
+    // The few lines above are for the reference matrix contribution.
+    // Although I start the loop below from index 0, the loop only performs
+    // actions when the index is >= 1 the detData object contains all the
+    // information about the P^T and Q matrices (projection matrices) needed in
+    // the table method
+    const int* restrict data_it = detData_up.data();
+    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
+        const int k = data_it[datum];
+
+        if (k == 0) {
+            datum += 3 * k + 1;
+        }
+
+        else {
+            // Number of rows and cols of P^T
+            const int prows = k;
+            const int pcols = nel;
+            // Number of rows and cols of Q
+            const int qrows = nmo;
+            const int qcols = k;
+
+            Y5.resize(nel, k);
+            Y6.resize(k, k);
+
+            // Any matrix multiplication of P^T or Q is simply a projection
+            // Explicit matrix multiplication can be avoided; instead column or
+            // row copying can be done BlAS::copy(size of col/row being copied,
+            //            Matrix pointer + place to begin copying,
+            //            storage spacing (number of elements btw next row/col
+            //            element), Pointer to resultant matrix + place to begin
+            //            pasting, storage spacing of resultant matrix)
+            // For example the next 4 lines is the matrix multiplication of T*Q
+            // = Y5
+            std::fill(Y5.begin(), Y5.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
+                    Y5.data() + i, k);
+            }
+
+            std::fill(Y6.begin(), Y6.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y6.data() + i * k), 1);
+            }
+
+            Vector<T> WS;
+            Vector<IndexType> Piv;
+            WS.resize(k);
+            Piv.resize(k);
+            std::complex<RealType> logdet = 0.0;
+            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+            Y11.resize(nel, k);
+            Y23.resize(k, k);
+            Y24.resize(k, k);
+            Y25.resize(k, k);
+            Y26.resize(k, nel);
+
+            std::fill(Y11.begin(), Y11.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo,
+                    Y11.data() + i, k);
+            }
+
+            std::fill(Y23.begin(), Y23.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y23.data() + i * k), 1);
+            }
+
+            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k,
+                Y6.data(), k, RealType(0.0), Y24.data(), k);
+            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k,
+                Y24.data(), k, RealType(0.0), Y25.data(), k);
+
+            Y26.resize(k, nel);
+
+            std::fill(Y26.begin(), Y26.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y25.data() + i, k,
+                    Y26.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            Y7.resize(k, nel);
+
+            std::fill(Y7.begin(), Y7.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y6.data() + i, k,
+                    Y7.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            // c_Tr_AlphaI_MI is a constant contributing to constant const2
+            // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
+            RealType c_Tr_AlphaI_MI = 0.0;
+            for (int i = 0; i < k; i++) {
+                c_Tr_AlphaI_MI += Y24(i, i);
+            }
+
+            for (int p = 0; p < lookup_tbl[index].size(); p++) {
+                // el_p is the element position that contains information about
+                // the CI coefficient, and det up/dn values associated with the
+                // current unique determinant
+                const int el_p(lookup_tbl[index][p]);
+                const RealType c = cptr[el_p];
+                const size_t up = upC[el_p];
+                const size_t down = dnC[el_p];
+
+                const RealType alpha_1(c * detValues_dn[down] *
+                    detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
+                const RealType alpha_2(c * detValues_dn[down] *
+                    detValues_up[up] / detValues_up[0]);
+                const RealType alpha_3(
+                    c * Oi[down] * detValues_up[up] / detValues_up[0]);
+
+                const2 += alpha_1;
+
+                for (int i = 0; i < k; i++) {
+                    BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1,
+                        pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1,
+                        pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1,
+                        pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1,
+                        pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                }
+            }
+            datum += 3 * k + 1;
         }
-      }
-      datum += 3 * k + 1;
     }
-  }
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(),
-             nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(),
-             nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo);
-
-  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
-  {
-    int kk = this->myVars.where(k);
-    if (kk >= 0)
-    {
-      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-      if (i <= nel - 1 && j > nel - 1)
-      {
-        dhpsioverpsi[kk] +=
-            T(-0.5 * Y4(i, j) -
-              0.5 *
-                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
-                   K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) -
-                   const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) +
-                   K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j)));
-      }
-      else if (i <= nel - 1 && j <= nel - 1)
-      {
-        dhpsioverpsi[kk] +=
-            T(-0.5 * (Y4(i, j) - Y4(j, i)) -
-              0.5 *
-                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
-                   TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) +
-                   K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) +
-                   const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) +
-                   K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
-      }
-      else
-      {
-        dhpsioverpsi[kk] += T(-0.5 *
-                              (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i)
-
-                               + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) +
-                               K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i)));
-      }
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel,
+        RealType(0.0), K1T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo,
+        RealType(0.0), TK1T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo,
+        pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_,
+        nmo, RealType(0.0), TK2AiB.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo,
+        pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_,
+        nmo, RealType(0.0), TK2XA.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo,
+        pK2.data(), nel, RealType(0.0), K2T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo,
+        RealType(0.0), TK2T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo,
+        Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel,
+        RealType(0.0), K3T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo,
+        RealType(0.0), TK3T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel,
+        RealType(0.0), K5T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo,
+        RealType(0.0), TK5T.data(), nmo);
+
+    for (int mu = 0, k = parameter_start_index;
+         k < (parameter_start_index + parameters_size); k++, mu++) {
+        int kk = this->myVars.where(k);
+        if (kk >= 0) {
+            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+            if (i <= nel - 1 && j > nel - 1) {
+                dhpsioverpsi[kk] += T(-0.5 * Y4(i, j) -
+                    0.5 *
+                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) -
+                            K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
+                            K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) -
+                            K1T(j, i) - TK1T(i, j) -
+                            const2 / const1 * K2T(i, j) +
+                            const2 / const1 * K2T(j, i) +
+                            const2 / const1 * TK2T(i, j) + K3T(i, j) -
+                            K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) +
+                            TK2T(i, j)));
+            }
+            else if (i <= nel - 1 && j <= nel - 1) {
+                dhpsioverpsi[kk] += T(-0.5 * (Y4(i, j) - Y4(j, i)) -
+                    0.5 *
+                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) +
+                            K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
+                            TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) +
+                            TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) +
+                            MK2T(j, i) + K1T(i, j) - K1T(j, i) - TK1T(i, j) +
+                            TK1T(j, i) - const2 / const1 * K2T(i, j) +
+                            const2 / const1 * K2T(j, i) +
+                            const2 / const1 * TK2T(i, j) -
+                            const2 / const1 * TK2T(j, i) + K3T(i, j) -
+                            K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) +
+                            K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
+            }
+            else {
+                dhpsioverpsi[kk] += T(-0.5 *
+                    (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) -
+                        K2XA(i, j) + K2XA(j, i)
+
+                        + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) +
+                        const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) -
+                        K2T(i, j) + K2T(j, i)));
+            }
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi,
-                                          const size_t nel,
-                                          const size_t nmo,
-                                          const T& psiCurrent,
-                                          const std::vector<T>& Coeff,
-                                          const std::vector<size_t>& C2node_up,
-                                          const std::vector<size_t>& C2node_dn,
-                                          const ValueVector& detValues_up,
-                                          const ValueVector& detValues_dn,
-                                          const ValueMatrix& M_up,
-                                          const ValueMatrix& M_dn,
-                                          const ValueMatrix& Minv_up,
-                                          const ValueMatrix& Minv_dn,
-                                          const std::vector<int>& detData_up,
-                                          const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi, const size_t nel,
+    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const std::vector<int>& detData_up,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  ValueMatrix Table;
-  ValueMatrix Y5, Y6, Y7;
-  ValueMatrix pK4, K4T, TK4T;
-
-  Table.resize(nel, nmo);
-
-  Bbar.resize(nel, nmo);
-
-  pK4.resize(nmo, nel);
-  K4T.resize(nmo, nmo);
-  TK4T.resize(nel, nmo);
-
-  const int parameters_size(m_act_rot_inds.size());
-  const int parameter_start_index(0);
-
-  const size_t num_unique_up_dets(detValues_up.size());
-  const size_t num_unique_dn_dets(detValues_dn.size());
-
-  const T* restrict cptr = Coeff.data();
-  const size_t nc        = Coeff.size();
-  const size_t* restrict upC(C2node_up.data());
-  const size_t* restrict dnC(C2node_dn.data());
-
-  T* T_(Table.data());
-
-  const T* restrict A(M_up.data());
-  const T* restrict Ainv(Minv_up.data());
-  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-  // THIS CASE
-  //  The T matrix should be calculated and stored for use
-  //  T = A^{-1} \widetilde A
-  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-  // and that BLAS commands assume column major
-  BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
-
-  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
-  RealType const0(0.0), const1(0.0), const2(0.0);
-  for (size_t i = 0; i < nc; ++i)
-  {
-    const RealType c  = cptr[i];
-    const size_t up   = upC[i];
-    const size_t down = dnC[i];
-
-    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
-  }
-
-  std::fill(pK4.begin(), pK4.end(), 0.0);
-
-  // Now we are going to loop through all unique determinants.
-  // The few lines above are for the reference matrix contribution.
-  // Although I start the loop below from index 0, the loop only performs
-  // actions when the index is >= 1 the detData object contains all the
-  // information about the P^T and Q matrices (projection matrices) needed in
-  // the table method
-  const int* restrict data_it = detData_up.data();
-  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
-  {
-    const int k = data_it[datum];
-
-    if (k == 0)
-    {
-      datum += 3 * k + 1;
+    ValueMatrix Table;
+    ValueMatrix Y5, Y6, Y7;
+    ValueMatrix pK4, K4T, TK4T;
+
+    Table.resize(nel, nmo);
+
+    Bbar.resize(nel, nmo);
+
+    pK4.resize(nmo, nel);
+    K4T.resize(nmo, nmo);
+    TK4T.resize(nel, nmo);
+
+    const int parameters_size(m_act_rot_inds.size());
+    const int parameter_start_index(0);
+
+    const size_t num_unique_up_dets(detValues_up.size());
+    const size_t num_unique_dn_dets(detValues_dn.size());
+
+    const T* restrict cptr = Coeff.data();
+    const size_t nc = Coeff.size();
+    const size_t* restrict upC(C2node_up.data());
+    const size_t* restrict dnC(C2node_dn.data());
+
+    T* T_(Table.data());
+
+    const T* restrict A(M_up.data());
+    const T* restrict Ainv(Minv_up.data());
+    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+    // THIS CASE
+    //  The T matrix should be calculated and stored for use
+    //  T = A^{-1} \widetilde A
+    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+    // and that BLAS commands assume column major
+    BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel,
+        RealType(0.0), T_, nmo);
+
+    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
+    RealType const0(0.0), const1(0.0), const2(0.0);
+    for (size_t i = 0; i < nc; ++i) {
+        const RealType c = cptr[i];
+        const size_t up = upC[i];
+        const size_t down = dnC[i];
+
+        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
     }
 
-    else
-    {
-      // Number of rows and cols of P^T
-      const int prows = k;
-      const int pcols = nel;
-      // Number of rows and cols of Q
-      const int qrows = nmo;
-      const int qcols = k;
-
-      Y5.resize(nel, k);
-      Y6.resize(k, k);
-
-      // Any matrix multiplication of P^T or Q is simply a projection
-      // Explicit matrix multiplication can be avoided; instead column or
-      // row copying can be done BlAS::copy(size of col/row being copied,
-      //            Matrix pointer + place to begin copying,
-      //            storage spacing (number of elements btw next row/col
-      //            element), Pointer to resultant matrix + place to begin
-      //            pasting, storage spacing of resultant matrix)
-      // For example the next 4 lines is the matrix multiplication of T*Q
-      // = Y5
-      std::fill(Y5.begin(), Y5.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
-      }
-
-      std::fill(Y6.begin(), Y6.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
-      }
-
-      Vector<T> WS;
-      Vector<IndexType> Piv;
-      WS.resize(k);
-      Piv.resize(k);
-      std::complex<RealType> logdet = 0.0;
-      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-      Y7.resize(k, nel);
-
-      std::fill(Y7.begin(), Y7.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      for (int p = 0; p < lookup_tbl[index].size(); p++)
-      {
-        // el_p is the element position that contains information about
-        // the CI coefficient, and det up/dn values associated with the
-        // current unique determinant
-        const int el_p(lookup_tbl[index][p]);
-        const RealType c  = cptr[el_p];
-        const size_t up   = upC[el_p];
-        const size_t down = dnC[el_p];
-
-        const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent));
-
-        for (int i = 0; i < k; i++)
-        {
-          BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+    std::fill(pK4.begin(), pK4.end(), 0.0);
+
+    // Now we are going to loop through all unique determinants.
+    // The few lines above are for the reference matrix contribution.
+    // Although I start the loop below from index 0, the loop only performs
+    // actions when the index is >= 1 the detData object contains all the
+    // information about the P^T and Q matrices (projection matrices) needed in
+    // the table method
+    const int* restrict data_it = detData_up.data();
+    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
+        const int k = data_it[datum];
+
+        if (k == 0) {
+            datum += 3 * k + 1;
+        }
+
+        else {
+            // Number of rows and cols of P^T
+            const int prows = k;
+            const int pcols = nel;
+            // Number of rows and cols of Q
+            const int qrows = nmo;
+            const int qcols = k;
+
+            Y5.resize(nel, k);
+            Y6.resize(k, k);
+
+            // Any matrix multiplication of P^T or Q is simply a projection
+            // Explicit matrix multiplication can be avoided; instead column or
+            // row copying can be done BlAS::copy(size of col/row being copied,
+            //            Matrix pointer + place to begin copying,
+            //            storage spacing (number of elements btw next row/col
+            //            element), Pointer to resultant matrix + place to begin
+            //            pasting, storage spacing of resultant matrix)
+            // For example the next 4 lines is the matrix multiplication of T*Q
+            // = Y5
+            std::fill(Y5.begin(), Y5.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
+                    Y5.data() + i, k);
+            }
+
+            std::fill(Y6.begin(), Y6.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y6.data() + i * k), 1);
+            }
+
+            Vector<T> WS;
+            Vector<IndexType> Piv;
+            WS.resize(k);
+            Piv.resize(k);
+            std::complex<RealType> logdet = 0.0;
+            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+            Y7.resize(k, nel);
+
+            std::fill(Y7.begin(), Y7.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y6.data() + i, k,
+                    Y7.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            for (int p = 0; p < lookup_tbl[index].size(); p++) {
+                // el_p is the element position that contains information about
+                // the CI coefficient, and det up/dn values associated with the
+                // current unique determinant
+                const int el_p(lookup_tbl[index][p]);
+                const RealType c = cptr[el_p];
+                const size_t up = upC[el_p];
+                const size_t down = dnC[el_p];
+
+                const RealType alpha_4(c * detValues_dn[down] *
+                    detValues_up[up] * (1 / psiCurrent));
+
+                for (int i = 0; i < k; i++) {
+                    BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1,
+                        pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                }
+            }
+            datum += 3 * k + 1;
         }
-      }
-      datum += 3 * k + 1;
     }
-  }
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo);
-
-  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
-  {
-    int kk = this->myVars.where(k);
-    if (kk >= 0)
-    {
-      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-      if (i <= nel - 1 && j > nel - 1)
-      {
-        dlogpsi[kk] +=
-            T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
-      }
-      else if (i <= nel - 1 && j <= nel - 1)
-      {
-        dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) +
-                         (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
-      }
-      else
-      {
-        dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
-      }
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel,
+        RealType(0.0), K4T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo,
+        RealType(0.0), TK4T.data(), nmo);
+
+    for (int mu = 0, k = parameter_start_index;
+         k < (parameter_start_index + parameters_size); k++, mu++) {
+        int kk = this->myVars.where(k);
+        if (kk >= 0) {
+            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+            if (i <= nel - 1 && j > nel - 1) {
+                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j)) * const0 *
+                        (1 / psiCurrent) +
+                    (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
+            }
+            else if (i <= nel - 1 && j <= nel - 1) {
+                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) *
+                        const0 * (1 / psiCurrent) +
+                    (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
+            }
+            else {
+                dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
+            }
+        }
     }
-  }
 }
 
-template<typename T>
-std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
+template <typename T>
+std::unique_ptr<SPOSetT<T>>
+RotatedSPOsT<T>::makeClone() const
 {
-  auto myclone = std::make_unique<RotatedSPOsT>(SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
-
-  myclone->params          = this->params;
-  myclone->params_supplied = this->params_supplied;
-  myclone->m_act_rot_inds  = this->m_act_rot_inds;
-  myclone->m_full_rot_inds = this->m_full_rot_inds;
-  myclone->myVars          = this->myVars;
-  myclone->myVarsFull      = this->myVarsFull;
-  myclone->history_params_ = this->history_params_;
-  myclone->use_global_rot_ = this->use_global_rot_;
-  return myclone;
+    auto myclone = std::make_unique<RotatedSPOsT>(
+        SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
+
+    myclone->params = this->params;
+    myclone->params_supplied = this->params_supplied;
+    myclone->m_act_rot_inds = this->m_act_rot_inds;
+    myclone->m_full_rot_inds = this->m_full_rot_inds;
+    myclone->myVars = this->myVars;
+    myclone->myVarsFull = this->myVarsFull;
+    myclone->history_params_ = this->history_params_;
+    myclone->use_global_rot_ = this->use_global_rot_;
+    return myclone;
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
index 77daf7fd92..971d2528b3 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.h
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -19,401 +19,410 @@
 
 namespace qmcplusplus
 {
-template<typename T>
+template <typename T>
 class RotatedSPOsT;
 namespace testing
 {
-opt_variables_type& getMyVarsFull(RotatedSPOsT<double>& rot);
-opt_variables_type& getMyVarsFull(RotatedSPOsT<float>& rot);
-std::vector<std::vector<double>>& getHistoryParams(RotatedSPOsT<double>& rot);
-std::vector<std::vector<float>>& getHistoryParams(RotatedSPOsT<float>& rot);
+OptVariablesType<double>&
+getMyVarsFull(RotatedSPOsT<double>& rot);
+OptVariablesType<float>&
+getMyVarsFull(RotatedSPOsT<float>& rot);
+std::vector<std::vector<double>>&
+getHistoryParams(RotatedSPOsT<double>& rot);
+std::vector<std::vector<float>>&
+getHistoryParams(RotatedSPOsT<float>& rot);
 } // namespace testing
 
-template<class T>
-class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
+template <class T>
+class RotatedSPOsT : public SPOSetT<T>, public OptimizableObjectT<T>
 {
 public:
-  using IndexType    = typename SPOSetT<T>::IndexType;
-  using RealType     = typename SPOSetT<T>::RealType;
-  using ValueType    = typename SPOSetT<T>::ValueType;
-  using FullRealType = typename SPOSetT<T>::FullRealType;
-  using ValueVector  = typename SPOSetT<T>::ValueVector;
-  using ValueMatrix  = typename SPOSetT<T>::ValueMatrix;
-  using GradVector   = typename SPOSetT<T>::GradVector;
-  using GradMatrix   = typename SPOSetT<T>::GradMatrix;
-  using HessVector   = typename SPOSetT<T>::HessVector;
-  using HessMatrix   = typename SPOSetT<T>::HessMatrix;
-  using GGGVector    = typename SPOSetT<T>::GGGVector;
-  using GGGMatrix    = typename SPOSetT<T>::GGGMatrix;
-
-  // constructor
-  RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
-  // destructor
-  ~RotatedSPOsT() override;
-
-  std::string getClassName() const override { return "RotatedSPOsT"; }
-  bool isOptimizable() const override { return true; }
-  bool isOMPoffload() const override { return Phi->isOMPoffload(); }
-  bool hasIonDerivs() const override { return Phi->hasIonDerivs(); }
-
-  // Vector of rotation matrix indices
-  using RotationIndices = std::vector<std::pair<int, int>>;
-
-  // Active orbital rotation parameter indices
-  RotationIndices m_act_rot_inds;
-
-  // Full set of rotation values for global rotation
-  RotationIndices m_full_rot_inds;
-
-  // Construct a list of the matrix indices for non-zero rotation parameters.
-  // (The structure for a sparse representation of the matrix)
-  // Only core->active rotations are created.
-  static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
-
-  // Construct a list for all the matrix indices, including core->active,
-  // core->core and active->active
-  static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
-
-  // Fill in antisymmetric matrix from the list of rotation parameter indices
-  // and a list of parameter values.
-  // This function assumes rot_mat is properly sized upon input and is set to
-  // zero.
-  static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                           const std::vector<RealType>& param,
-                                           ValueMatrix& rot_mat);
-
-  // Extract the list of rotation parameters from the entries in an
-  // antisymmetric matrix This function expects rot_indices and param are the
-  // same length.
-  static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                                   const ValueMatrix& rot_mat,
-                                                   std::vector<RealType>& param);
-
-  // function to perform orbital rotations
-  void apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
-
-  // For global rotation, inputs are the old parameters and the delta
-  // parameters. The corresponding rotation matrices are constructed,
-  // multiplied together, and the new parameters extracted. The new rotation
-  // is applied to the underlying SPO coefficients
-  void applyDeltaRotation(const std::vector<RealType>& delta_param,
-                          const std::vector<RealType>& old_param,
-                          std::vector<RealType>& new_param);
-
-  // Perform the construction of matrices and extraction of parameters for a
-  // delta rotation. Split out and made static for testing.
-  static void constructDeltaRotation(const std::vector<RealType>& delta_param,
-                                     const std::vector<RealType>& old_param,
-                                     const RotationIndices& act_rot_inds,
-                                     const RotationIndices& full_rot_inds,
-                                     std::vector<RealType>& new_param,
-                                     ValueMatrix& new_rot_mat);
-
-  // When initializing the rotation from VP files
-  // This function applies the rotation history
-  void applyRotationHistory();
-
-  // This function applies the global rotation (similar to apply_rotation, but
-  // for the full set of rotation parameters)
-  void applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy);
-
-  // Compute matrix exponential of an antisymmetric matrix (result is rotation
-  // matrix)
-  static void exponentiate_antisym_matrix(ValueMatrix& mat);
-
-  // Compute matrix log of rotation matrix to produce antisymmetric matrix
-  static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
-
-  // A particular SPOSet used for Orbitals
-  std::unique_ptr<SPOSetT<T>> Phi;
-
-  /// Set the rotation parameters (usually from input file)
-  void setRotationParameters(const std::vector<RealType>& param_list);
-
-  /// the number of electrons of the majority spin
-  size_t nel_major_;
-
-  std::unique_ptr<SPOSetT<T>> makeClone() const override;
-
-  // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
-  // Determinant part of the wfn myG_J is the Gradient of the all other parts
-  // of the wavefunction (typically just the Jastrow).
-  //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
-  // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
-  // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
-  // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
-  // will hold
-  ParticleSet::ParticleGradient myG_temp, myG_J;
-  ParticleSet::ParticleLaplacian myL_temp, myL_J;
-
-  ValueMatrix Bbar;
-  ValueMatrix psiM_inv;
-  ValueMatrix psiM_all;
-  GradMatrix dpsiM_all;
-  ValueMatrix d2psiM_all;
-
-  // Single Slater creation
-  void buildOptVariables(size_t nel);
-
-  // For the MSD case rotations must be created in MultiSlaterDetTableMethod
-  // class
-  void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations);
-
-  void evaluateDerivatives(ParticleSet& P,
-                           const opt_variables_type& optvars,
-                           Vector<T>& dlogpsi,
-                           Vector<T>& dhpsioverpsi,
-                           const int& FirstIndex,
-                           const int& LastIndex) override;
-
-  void evaluateDerivativesWF(ParticleSet& P,
-                             const opt_variables_type& optvars,
-                             Vector<T>& dlogpsi,
-                             int FirstIndex,
-                             int LastIndex) override;
-
-  void evaluateDerivatives(ParticleSet& P,
-                           const opt_variables_type& optvars,
-                           Vector<T>& dlogpsi,
-                           Vector<T>& dhpsioverpsi,
-                           const T& psiCurrent,
-                           const std::vector<T>& Coeff,
-                           const std::vector<size_t>& C2node_up,
-                           const std::vector<size_t>& C2node_dn,
-                           const ValueVector& detValues_up,
-                           const ValueVector& detValues_dn,
-                           const GradMatrix& grads_up,
-                           const GradMatrix& grads_dn,
-                           const ValueMatrix& lapls_up,
-                           const ValueMatrix& lapls_dn,
-                           const ValueMatrix& M_up,
-                           const ValueMatrix& M_dn,
-                           const ValueMatrix& Minv_up,
-                           const ValueMatrix& Minv_dn,
-                           const GradMatrix& B_grad,
-                           const ValueMatrix& B_lapl,
-                           const std::vector<int>& detData_up,
-                           const size_t N1,
-                           const size_t N2,
-                           const size_t NP1,
-                           const size_t NP2,
-                           const std::vector<std::vector<int>>& lookup_tbl) override;
-
-  void evaluateDerivativesWF(ParticleSet& P,
-                             const opt_variables_type& optvars,
-                             Vector<ValueType>& dlogpsi,
-                             const ValueType& psiCurrent,
-                             const std::vector<ValueType>& Coeff,
-                             const std::vector<size_t>& C2node_up,
-                             const std::vector<size_t>& C2node_dn,
-                             const ValueVector& detValues_up,
-                             const ValueVector& detValues_dn,
-                             const ValueMatrix& M_up,
-                             const ValueMatrix& M_dn,
-                             const ValueMatrix& Minv_up,
-                             const ValueMatrix& Minv_dn,
-                             const std::vector<int>& detData_up,
-                             const std::vector<std::vector<int>>& lookup_tbl) override;
-
-  // helper function to evaluatederivative; evaluate orbital rotation
-  // parameter derivative using table method
-  void table_method_eval(Vector<T>& dlogpsi,
-                         Vector<T>& dhpsioverpsi,
-                         const ParticleSet::ParticleLaplacian& myL_J,
-                         const ParticleSet::ParticleGradient& myG_J,
-                         const size_t nel,
-                         const size_t nmo,
-                         const T& psiCurrent,
-                         const std::vector<T>& Coeff,
-                         const std::vector<size_t>& C2node_up,
-                         const std::vector<size_t>& C2node_dn,
-                         const ValueVector& detValues_up,
-                         const ValueVector& detValues_dn,
-                         const GradMatrix& grads_up,
-                         const GradMatrix& grads_dn,
-                         const ValueMatrix& lapls_up,
-                         const ValueMatrix& lapls_dn,
-                         const ValueMatrix& M_up,
-                         const ValueMatrix& M_dn,
-                         const ValueMatrix& Minv_up,
-                         const ValueMatrix& Minv_dn,
-                         const GradMatrix& B_grad,
-                         const ValueMatrix& B_lapl,
-                         const std::vector<int>& detData_up,
-                         const size_t N1,
-                         const size_t N2,
-                         const size_t NP1,
-                         const size_t NP2,
-                         const std::vector<std::vector<int>>& lookup_tbl);
-
-  void table_method_evalWF(Vector<T>& dlogpsi,
-                           const size_t nel,
-                           const size_t nmo,
-                           const T& psiCurrent,
-                           const std::vector<T>& Coeff,
-                           const std::vector<size_t>& C2node_up,
-                           const std::vector<size_t>& C2node_dn,
-                           const ValueVector& detValues_up,
-                           const ValueVector& detValues_dn,
-                           const ValueMatrix& M_up,
-                           const ValueMatrix& M_dn,
-                           const ValueMatrix& Minv_up,
-                           const ValueMatrix& Minv_dn,
-                           const std::vector<int>& detData_up,
-                           const std::vector<std::vector<int>>& lookup_tbl);
-
-  void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); }
-
-  void checkInVariablesExclusive(opt_variables_type& active) override
-  {
-    if (this->myVars.size())
-      active.insertFrom(this->myVars);
-  }
-
-  void checkOutVariables(const opt_variables_type& active) override { this->myVars.getIndex(active); }
-
-  /// reset
-  void resetParametersExclusive(const opt_variables_type& active) override;
-
-  void writeVariationalParameters(hdf_archive& hout) override;
-
-  void readVariationalParameters(hdf_archive& hin) override;
-
-  //*********************************************************************************
-  // the following functions simply call Phi's corresponding functions
-  void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); }
-
-  void checkObject() const override { Phi->checkObject(); }
-
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    Phi->evaluateValue(P, iat, psi);
-  }
-
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
-  }
-
-  void evaluateDetRatios(const VirtualParticleSet& VP,
-                         ValueVector& psi,
-                         const ValueVector& psiinv,
-                         std::vector<T>& ratios) override
-  {
-    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
-  }
-
-  void evaluateDerivRatios(const VirtualParticleSet& VP,
-                           const opt_variables_type& optvars,
-                           ValueVector& psi,
-                           const ValueVector& psiinv,
-                           std::vector<T>& ratios,
-                           Matrix<T>& dratios,
-                           int FirstIndex,
-                           int LastIndex) override;
-
-  void evaluateVGH(const ParticleSet& P,
-                   int iat,
-                   ValueVector& psi,
-                   GradVector& dpsi,
-                   HessVector& grad_grad_psi) override
-  {
-    assert(psi.size() <= this->OrbitalSetSize);
-    Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
-  }
-
-  void evaluateVGHGH(const ParticleSet& P,
-                     int iat,
-                     ValueVector& psi,
-                     GradVector& dpsi,
-                     HessVector& grad_grad_psi,
-                     GGGVector& grad_grad_grad_psi) override
-  {
-    Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
-  }
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override
-  {
-    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
-  }
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& grad_grad_logdet) override
-  {
-    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet);
-  }
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            HessMatrix& grad_grad_logdet,
-                            GGGMatrix& grad_grad_grad_logdet) override
-  {
-    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
-  }
-
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& grad_phi) override
-  {
-    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
-  }
-
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          GradMatrix& grad_phi,
-                          HessMatrix& grad_grad_phi,
-                          GradMatrix& grad_lapl_phi) override
-  {
-    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi);
-  }
-
-  //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
-  //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
-  //  last, grad_grad_grad_logdet); }
-
-  /// Use history list (false) or global rotation (true)
-  void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
+    using IndexType = typename SPOSetT<T>::IndexType;
+    using RealType = typename SPOSetT<T>::RealType;
+    using ValueType = typename SPOSetT<T>::ValueType;
+    using FullRealType = typename SPOSetT<T>::FullRealType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using HessVector = typename SPOSetT<T>::HessVector;
+    using HessMatrix = typename SPOSetT<T>::HessMatrix;
+    using GGGVector = typename SPOSetT<T>::GGGVector;
+    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+
+    // constructor
+    RotatedSPOsT(
+        const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
+    // destructor
+    ~RotatedSPOsT() override;
+
+    std::string
+    getClassName() const override
+    {
+        return "RotatedSPOsT";
+    }
+    bool
+    isOptimizable() const override
+    {
+        return true;
+    }
+    bool
+    isOMPoffload() const override
+    {
+        return Phi->isOMPoffload();
+    }
+    bool
+    hasIonDerivs() const override
+    {
+        return Phi->hasIonDerivs();
+    }
+
+    // Vector of rotation matrix indices
+    using RotationIndices = std::vector<std::pair<int, int>>;
+
+    // Active orbital rotation parameter indices
+    RotationIndices m_act_rot_inds;
+
+    // Full set of rotation values for global rotation
+    RotationIndices m_full_rot_inds;
+
+    // Construct a list of the matrix indices for non-zero rotation parameters.
+    // (The structure for a sparse representation of the matrix)
+    // Only core->active rotations are created.
+    static void
+    createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
+
+    // Construct a list for all the matrix indices, including core->active,
+    // core->core and active->active
+    static void
+    createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
+
+    // Fill in antisymmetric matrix from the list of rotation parameter indices
+    // and a list of parameter values.
+    // This function assumes rot_mat is properly sized upon input and is set to
+    // zero.
+    static void
+    constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+        const std::vector<RealType>& param, ValueMatrix& rot_mat);
+
+    // Extract the list of rotation parameters from the entries in an
+    // antisymmetric matrix This function expects rot_indices and param are the
+    // same length.
+    static void
+    extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+        const ValueMatrix& rot_mat, std::vector<RealType>& param);
+
+    // function to perform orbital rotations
+    void
+    apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
+
+    // For global rotation, inputs are the old parameters and the delta
+    // parameters. The corresponding rotation matrices are constructed,
+    // multiplied together, and the new parameters extracted. The new rotation
+    // is applied to the underlying SPO coefficients
+    void
+    applyDeltaRotation(const std::vector<RealType>& delta_param,
+        const std::vector<RealType>& old_param,
+        std::vector<RealType>& new_param);
+
+    // Perform the construction of matrices and extraction of parameters for a
+    // delta rotation. Split out and made static for testing.
+    static void
+    constructDeltaRotation(const std::vector<RealType>& delta_param,
+        const std::vector<RealType>& old_param,
+        const RotationIndices& act_rot_inds,
+        const RotationIndices& full_rot_inds, std::vector<RealType>& new_param,
+        ValueMatrix& new_rot_mat);
+
+    // When initializing the rotation from VP files
+    // This function applies the rotation history
+    void
+    applyRotationHistory();
+
+    // This function applies the global rotation (similar to apply_rotation, but
+    // for the full set of rotation parameters)
+    void
+    applyFullRotation(
+        const std::vector<RealType>& full_param, bool use_stored_copy);
+
+    // Compute matrix exponential of an antisymmetric matrix (result is rotation
+    // matrix)
+    static void
+    exponentiate_antisym_matrix(ValueMatrix& mat);
+
+    // Compute matrix log of rotation matrix to produce antisymmetric matrix
+    static void
+    log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
+
+    // A particular SPOSet used for Orbitals
+    std::unique_ptr<SPOSetT<T>> Phi;
+
+    /// Set the rotation parameters (usually from input file)
+    void
+    setRotationParameters(const std::vector<RealType>& param_list);
+
+    /// the number of electrons of the majority spin
+    size_t nel_major_;
+
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override;
+
+    // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
+    // Determinant part of the wfn myG_J is the Gradient of the all other parts
+    // of the wavefunction (typically just the Jastrow).
+    //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
+    // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
+    // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
+    // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
+    // will hold
+    typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
+    typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
+
+    ValueMatrix Bbar;
+    ValueMatrix psiM_inv;
+    ValueMatrix psiM_all;
+    GradMatrix dpsiM_all;
+    ValueMatrix d2psiM_all;
+
+    // Single Slater creation
+    void
+    buildOptVariables(size_t nel);
+
+    // For the MSD case rotations must be created in MultiSlaterDetTableMethod
+    // class
+    void
+    buildOptVariables(const RotationIndices& rotations,
+        const RotationIndices& full_rotations);
+
+    void
+    evaluateDerivatives(ParticleSetT<T>& P, const OptVariablesType<T>& optvars,
+        Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi, const int& FirstIndex,
+        const int& LastIndex) override;
+
+    void
+    evaluateDerivativesWF(ParticleSetT<T>& P,
+        const OptVariablesType<T>& optvars, Vector<T>& dlogpsi, int FirstIndex,
+        int LastIndex) override;
+
+    void
+    evaluateDerivatives(ParticleSetT<T>& P, const OptVariablesType<T>& optvars,
+        Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi, const T& psiCurrent,
+        const std::vector<T>& Coeff, const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const GradMatrix& grads_up,
+        const GradMatrix& grads_dn, const ValueMatrix& lapls_up,
+        const ValueMatrix& lapls_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const GradMatrix& B_grad,
+        const ValueMatrix& B_lapl, const std::vector<int>& detData_up,
+        const size_t N1, const size_t N2, const size_t NP1, const size_t NP2,
+        const std::vector<std::vector<int>>& lookup_tbl) override;
+
+    void
+    evaluateDerivativesWF(ParticleSetT<T>& P,
+        const OptVariablesType<T>& optvars, Vector<ValueType>& dlogpsi,
+        const ValueType& psiCurrent, const std::vector<ValueType>& Coeff,
+        const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const std::vector<int>& detData_up,
+        const std::vector<std::vector<int>>& lookup_tbl) override;
+
+    // helper function to evaluatederivative; evaluate orbital rotation
+    // parameter derivative using table method
+    void
+    table_method_eval(Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi,
+        const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
+        const typename ParticleSetT<T>::ParticleGradient& myG_J,
+        const size_t nel, const size_t nmo, const T& psiCurrent,
+        const std::vector<T>& Coeff, const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const GradMatrix& grads_up,
+        const GradMatrix& grads_dn, const ValueMatrix& lapls_up,
+        const ValueMatrix& lapls_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const GradMatrix& B_grad,
+        const ValueMatrix& B_lapl, const std::vector<int>& detData_up,
+        const size_t N1, const size_t N2, const size_t NP1, const size_t NP2,
+        const std::vector<std::vector<int>>& lookup_tbl);
+
+    void
+    table_method_evalWF(Vector<T>& dlogpsi, const size_t nel, const size_t nmo,
+        const T& psiCurrent, const std::vector<T>& Coeff,
+        const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const std::vector<int>& detData_up,
+        const std::vector<std::vector<int>>& lookup_tbl);
+
+    void
+    extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs) override
+    {
+        opt_obj_refs.push_back(*this);
+    }
+
+    void
+    checkInVariablesExclusive(OptVariablesType<T>& active) override
+    {
+        if (this->myVars.size())
+            active.insertFrom(this->myVars);
+    }
+
+    void
+    checkOutVariables(const OptVariablesType<T>& active) override
+    {
+        this->myVars.getIndex(active);
+    }
+
+    /// reset
+    void
+    resetParametersExclusive(const OptVariablesType<T>& active) override;
+
+    void
+    writeVariationalParameters(hdf_archive& hout) override;
+
+    void
+    readVariationalParameters(hdf_archive& hin) override;
+
+    //*********************************************************************************
+    // the following functions simply call Phi's corresponding functions
+    void
+    setOrbitalSetSize(int norbs) override
+    {
+        Phi->setOrbitalSetSize(norbs);
+    }
+
+    void
+    checkObject() const override
+    {
+        Phi->checkObject();
+    }
+
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override
+    {
+        assert(psi.size() <= this->OrbitalSetSize);
+        Phi->evaluateValue(P, iat, psi);
+    }
+
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override
+    {
+        assert(psi.size() <= this->OrbitalSetSize);
+        Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
+    }
+
+    void
+    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
+        const ValueVector& psiinv, std::vector<T>& ratios) override
+    {
+        Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+    }
+
+    void
+    evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+        const OptVariablesType<T>& optvars, ValueVector& psi,
+        const ValueVector& psiinv, std::vector<T>& ratios, Matrix<T>& dratios,
+        int FirstIndex, int LastIndex) override;
+
+    void
+    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi) override
+    {
+        assert(psi.size() <= this->OrbitalSetSize);
+        Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
+    }
+
+    void
+    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi,
+        GGGVector& grad_grad_grad_psi) override
+    {
+        Phi->evaluateVGHGH(
+            P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
+    }
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override
+    {
+        Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+    }
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        HessMatrix& grad_grad_logdet) override
+    {
+        Phi->evaluate_notranspose(
+            P, first, last, logdet, dlogdet, grad_grad_logdet);
+    }
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
+        GGGMatrix& grad_grad_grad_logdet) override
+    {
+        Phi->evaluate_notranspose(P, first, last, logdet, dlogdet,
+            grad_grad_logdet, grad_grad_grad_logdet);
+    }
+
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src,
+        GradMatrix& grad_phi) override
+    {
+        Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
+    }
+
+    void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override
+    {
+        Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi,
+            grad_grad_phi, grad_lapl_phi);
+    }
+
+    //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
+    //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
+    //  last, grad_grad_grad_logdet); }
+
+    /// Use history list (false) or global rotation (true)
+    void
+    set_use_global_rotation(bool use_global_rotation)
+    {
+        use_global_rot_ = use_global_rotation;
+    }
 
 private:
-  /// true if SPO parameters (orbital rotation parameters) have been supplied
-  /// by input
-  bool params_supplied;
-  /// list of supplied orbital rotation parameters
-  std::vector<RealType> params;
-
-  /// Full set of rotation matrix parameters for use in global rotation method
-  opt_variables_type myVarsFull;
-
-  /// List of previously applied parameters
-  std::vector<std::vector<RealType>> history_params_;
-
-  /// Use global rotation or history list
-  bool use_global_rot_ = true;
-
-  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<double>& rot);
-  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<float>& rot);
-  friend std::vector<std::vector<double>>& testing::getHistoryParams(RotatedSPOsT<double>& rot);
-  friend std::vector<std::vector<float>>& testing::getHistoryParams(RotatedSPOsT<float>& rot);
+    /// true if SPO parameters (orbital rotation parameters) have been supplied
+    /// by input
+    bool params_supplied;
+    /// list of supplied orbital rotation parameters
+    std::vector<RealType> params;
+
+    /// Full set of rotation matrix parameters for use in global rotation method
+    OptVariablesType<T> myVarsFull;
+
+    /// List of previously applied parameters
+    std::vector<std::vector<RealType>> history_params_;
+
+    /// Use global rotation or history list
+    bool use_global_rot_ = true;
+
+    friend OptVariablesType<double>&
+    testing::getMyVarsFull(RotatedSPOsT<double>& rot);
+    friend OptVariablesType<float>&
+    testing::getMyVarsFull(RotatedSPOsT<float>& rot);
+    friend std::vector<std::vector<double>>&
+    testing::getHistoryParams(RotatedSPOsT<double>& rot);
+    friend std::vector<std::vector<float>>&
+    testing::getHistoryParams(RotatedSPOsT<float>& rot);
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
index b98952f779..12148277a0 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
@@ -26,11 +26,10 @@
 #include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h"
 #include "QMCWaveFunctions/SPOSetScannerT.h"
 #if OHMMS_DIM == 3
+#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h"
 #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h"
-
 #if defined(QMC_COMPLEX)
 #include "QMCWaveFunctions/EinsplineSpinorSetBuilder.h"
-#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h"
 #endif
 
 #if defined(HAVE_EINSPLINE)
@@ -45,6 +44,29 @@
 
 namespace qmcplusplus
 {
+template <typename T>
+struct LCAOSpinorBuilderMaker
+{
+    template <typename... TArgs>
+    std::unique_ptr<LCAOSpinorBuilderT<T>>
+    operator()(TArgs&&...) const
+    {
+        throw std::runtime_error(
+            "lcao spinors not compatible with non-complex value types");
+    }
+};
+
+template <typename T>
+struct LCAOSpinorBuilderMaker<std::complex<T>>
+{
+    template <typename... TArgs>
+    std::unique_ptr<LCAOSpinorBuilderT<std::complex<T>>>
+    operator()(TArgs&&... args) const
+    {
+        return std::make_unique<LCAOSpinorBuilderT<std::complex<T>>>(
+            std::forward<TArgs>(args)...);
+    }
+};
 
 template <typename T>
 const SPOSetT<T>*
@@ -67,7 +89,7 @@ SPOSetBuilderFactoryT<T>::getSPOSet(const std::string& name) const
  */
 template <typename T>
 SPOSetBuilderFactoryT<T>::SPOSetBuilderFactoryT(
-    Communicate* comm, ParticleSet& els, const PSetMap& psets) :
+    Communicate* comm, ParticleSetT<T>& els, const PSetMap& psets) :
     MPIObjectBase(comm),
     targetPtcl(els),
     ptclPool(psets)
@@ -145,21 +167,22 @@ SPOSetBuilderFactoryT<T>::createSPOSetBuilder(xmlNodePtr rootNode)
         }
     }
     else if (type == "molecularorbital" || type == "mo") {
-        ParticleSet* ions = nullptr;
+        ParticleSetT<T>* ions = nullptr;
         // initialize with the source tag
         auto pit(ptclPool.find(sourceOpt));
         if (pit == ptclPool.end())
             PRE.error("Missing basisset/@source.", true);
         else
             ions = pit->second.get();
-        if (targetPtcl.isSpinor())
-#ifdef QMC_COMPLEX
-            bb = std::make_unique<LCAOSpinorBuilderT<T>>(
-                targetPtcl, *ions, myComm, rootNode);
-#else
-            PRE.error("Use of lcao spinors requires QMC_COMPLEX=1.  Rebuild "
-                      "with this option");
-#endif
+        if (targetPtcl.isSpinor()) {
+            try {
+                bb = LCAOSpinorBuilderMaker<T>{}(
+                    targetPtcl, *ions, myComm, rootNode);
+            }
+            catch (const std::exception& e) {
+                PRE.error(e.what());
+            }
+        }
         else
             bb = std::make_unique<LCAOrbitalBuilderT<T>>(
                 targetPtcl, *ions, myComm, rootNode);
@@ -253,11 +276,8 @@ SPOSetBuilderFactoryT<T>::addSPOSet(std::unique_ptr<SPOSetT<T>> spo)
 template <typename T>
 std::string SPOSetBuilderFactoryT<T>::basisset_tag = "basisset";
 
-#ifdef QMC_COMPLEX
 template class SPOSetBuilderFactoryT<std::complex<double>>;
 template class SPOSetBuilderFactoryT<std::complex<float>>;
-#else
 template class SPOSetBuilderFactoryT<double>;
 template class SPOSetBuilderFactoryT<float>;
-#endif
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
index ce1e9b89da..9841988d00 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
@@ -29,7 +29,8 @@ class SPOSetBuilderFactoryT : public MPIObjectBase
 {
 public:
     using SPOMap = typename SPOSetT<T>::SPOMap;
-    using PSetMap = std::map<std::string, const std::unique_ptr<ParticleSet>>;
+    using PSetMap =
+        std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
 
     /** constructor
      * \param comm communicator
@@ -37,7 +38,7 @@ class SPOSetBuilderFactoryT : public MPIObjectBase
      * \param ions reference to the ions
      */
     SPOSetBuilderFactoryT(
-        Communicate* comm, ParticleSet& els, const PSetMap& psets);
+        Communicate* comm, ParticleSetT<T>& els, const PSetMap& psets);
 
     ~SPOSetBuilderFactoryT();
 
@@ -74,7 +75,7 @@ class SPOSetBuilderFactoryT : public MPIObjectBase
 
 private:
     /// reference to the target particle
-    ParticleSet& targetPtcl;
+    ParticleSetT<T>& targetPtcl;
 
     /// reference to the particle pool
     const PSetMap& ptclPool;
diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h
index 9a3bb418a1..e4841b90bb 100644
--- a/src/QMCWaveFunctions/SPOSetScannerT.h
+++ b/src/QMCWaveFunctions/SPOSetScannerT.h
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
@@ -9,207 +9,276 @@
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SPOSET_SCANNERT_H
 #define QMCPLUSPLUS_SPOSET_SCANNERT_H
 
-#include "Particle/ParticleSet.h"
+#include "OhmmsData/AttributeSet.h"
+#include "Particle/ParticleSetT.h"
 #include "QMCWaveFunctions/OrbitalSetTraits.h"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "OhmmsData/AttributeSet.h"
 
 namespace qmcplusplus
 {
+template <typename T>
+struct OutputReportMakerBase
+{
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+
+    const ValueVector& SPO_v_avg;
+    const ValueVector& SPO_l_avg;
+    const GradVector& SPO_g_avg;
+    int nknots;
+};
+
+template <typename T>
+struct OutputReportMaker : OutputReportMakerBase<T>
+{
+    using RealType = typename SPOSetT<T>::RealType;
+
+    void
+    operator()(std::ofstream& output_report) const
+    {
+        output_report
+            << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg"
+            << std::endl;
+        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
+            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+            output_report << "\t" << iorb << "    " << std::scientific
+                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][0] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][1] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][2] * one_over_nknots << "   "
+                          << this->SPO_l_avg[iorb] * one_over_nknots
+                          << std::fixed << std::endl;
+        }
+    }
+};
+
+template <typename T>
+struct OutputReportMaker<std::complex<T>> :
+    OutputReportMakerBase<std::complex<T>>
+{
+    using RealType = typename SPOSetT<T>::RealType;
+
+    void
+    operator()(std::ofstream& output_report) const
+    {
+        output_report
+            << "#   Report: Orb   Value_avg I/R  Gradients_avg Laplacian_avg"
+            << std::endl;
+        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
+            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+            output_report << "\t" << iorb << "    " << std::scientific
+                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                          << this->SPO_v_avg[iorb].imag() /
+                    this->SPO_v_avg[iorb].real()
+                          << "   " << this->SPO_g_avg[iorb][0] * one_over_nknots
+                          << "   " << this->SPO_g_avg[iorb][1] * one_over_nknots
+                          << "   " << this->SPO_g_avg[iorb][2] * one_over_nknots
+                          << "   " << this->SPO_l_avg[iorb] * one_over_nknots
+                          << std::fixed << std::endl;
+        }
+    }
+};
+
 /** a scanner for all the SPO sets.
-   */
+ */
 template <typename T>
 class SPOSetScannerT
 {
 public:
-  using PtclPool    = std::map<std::string, const std::unique_ptr<ParticleSet>>;
-  using SPOSetMap   = typename SPOSetT<T>::SPOMap;
-  using RealType    = typename SPOSetT<T>::RealType;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using HessVector  = typename SPOSetT<T>::HessVector;
-
-  RealType myfabs(RealType s) { return std::fabs(s); }
-  template<typename U>
-  std::complex<U> myfabs(std::complex<U>& s)
-  {
-    return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
-  }
-  template<typename U>
-  TinyVector<U, OHMMS_DIM> myfabs(TinyVector<U, OHMMS_DIM>& s)
-  {
-    return TinyVector<U, OHMMS_DIM>(myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
-  }
-
-  const SPOSetMap& sposets;
-  ParticleSet& target;
-  const PtclPool& ptcl_pool_;
-  ParticleSet* ions;
-
-  // construction/destruction
-  SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSet& targetPtcl, const PtclPool& psets)
-      : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){};
-  //~SPOSetScannerT(){};
-
-  // processing scanning
-  void put(xmlNodePtr cur)
-  {
-    app_log() << "Entering the SPO set scanner!" << std::endl;
-    // check in the source particle set and search for it in the pool.
-    std::string sourcePtcl("ion0");
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(sourcePtcl, "source");
-    aAttrib.put(cur);
-    auto pit(ptcl_pool_.find(sourcePtcl));
-    if (pit == ptcl_pool_.end())
-      app_log() << "Source particle set not found. Can not be used as reference point." << std::endl;
-    else
-      ions = pit->second.get();
-
-    // scanning the SPO sets
-    xmlNodePtr cur_save = cur;
-    for (const auto& [name, sposet] : sposets)
+    using PtclPool =
+        std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
+    using SPOSetMap = typename SPOSetT<T>::SPOMap;
+    using RealType = typename SPOSetT<T>::RealType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using HessVector = typename SPOSetT<T>::HessVector;
+
+    RealType
+    myfabs(RealType s)
+    {
+        return std::fabs(s);
+    }
+    template <typename U>
+    std::complex<U>
+    myfabs(std::complex<U>& s)
+    {
+        return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
+    }
+    template <typename U>
+    TinyVector<U, OHMMS_DIM>
+    myfabs(TinyVector<U, OHMMS_DIM>& s)
+    {
+        return TinyVector<U, OHMMS_DIM>(
+            myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
+    }
+
+    const SPOSetMap& sposets;
+    ParticleSetT<T>& target;
+    const PtclPool& ptcl_pool_;
+    ParticleSetT<T>* ions;
+
+    // construction/destruction
+    SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT<T>& targetPtcl,
+        const PtclPool& psets) :
+        sposets(sposets_in),
+        target(targetPtcl),
+        ptcl_pool_(psets),
+        ions(0){};
+    //~SPOSetScannerT(){};
+
+    // processing scanning
+    void
+    put(xmlNodePtr cur)
     {
-      app_log() << "  Processing SPO " << sposet->getName() << std::endl;
-      // scanning the paths
-      cur = cur_save->children;
-      while (cur != NULL)
-      {
-        std::string trace_name("no name");
+        app_log() << "Entering the SPO set scanner!" << std::endl;
+        // check in the source particle set and search for it in the pool.
+        std::string sourcePtcl("ion0");
         OhmmsAttributeSet aAttrib;
-        aAttrib.add(trace_name, "name");
+        aAttrib.add(sourcePtcl, "source");
         aAttrib.put(cur);
-        std::string cname(getNodeName(cur));
-        std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name);
-        if (cname == "path")
-        {
-          app_log() << "    Scanning a " << cname << " called " << trace_name << " and writing to "
-                    << prefix + "_v/g/l/report.dat" << std::endl;
-          auto spo = sposet->makeClone();
-          scan_path(cur, *spo, prefix);
-        }
+        auto pit(ptcl_pool_.find(sourcePtcl));
+        if (pit == ptcl_pool_.end())
+            app_log() << "Source particle set not found. Can not be used as "
+                         "reference point."
+                      << std::endl;
         else
-        {
-          if (cname != "text" && cname != "comment")
-            app_log() << "    Unknown type of scanning " << cname << std::endl;
+            ions = pit->second.get();
+
+        // scanning the SPO sets
+        xmlNodePtr cur_save = cur;
+        for (const auto& [name, sposet] : sposets) {
+            app_log() << "  Processing SPO " << sposet->getName() << std::endl;
+            // scanning the paths
+            cur = cur_save->children;
+            while (cur != NULL) {
+                std::string trace_name("no name");
+                OhmmsAttributeSet aAttrib;
+                aAttrib.add(trace_name, "name");
+                aAttrib.put(cur);
+                std::string cname(getNodeName(cur));
+                std::string prefix(
+                    sposet->getName() + "_" + cname + "_" + trace_name);
+                if (cname == "path") {
+                    app_log() << "    Scanning a " << cname << " called "
+                              << trace_name << " and writing to "
+                              << prefix + "_v/g/l/report.dat" << std::endl;
+                    auto spo = sposet->makeClone();
+                    scan_path(cur, *spo, prefix);
+                }
+                else {
+                    if (cname != "text" && cname != "comment")
+                        app_log() << "    Unknown type of scanning " << cname
+                                  << std::endl;
+                }
+                cur = cur->next;
+            }
         }
-        cur = cur->next;
-      }
-    }
-    app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
-  }
-
-  // scanning a path
-  void scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
-  {
-    std::string file_name;
-    file_name = prefix + "_v.dat";
-    std::ofstream output_v(file_name.c_str());
-    file_name = prefix + "_g.dat";
-    std::ofstream output_g(file_name.c_str());
-    file_name = prefix + "_l.dat";
-    std::ofstream output_l(file_name.c_str());
-    file_name = prefix + "_report.dat";
-    std::ofstream output_report(file_name.c_str());
-
-    int nknots(2);
-    int from_atom(-1);
-    int to_atom(-1);
-    TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
-    TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
-
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(nknots, "nknots");
-    aAttrib.add(from_atom, "from_atom");
-    aAttrib.add(to_atom, "to_atom");
-    aAttrib.add(from_pos, "from_pos");
-    aAttrib.add(to_pos, "to_pos");
-    aAttrib.put(cur);
-
-    // sanity check
-    if (nknots < 2)
-      nknots = 2;
-    // check out the reference atom coordinates
-    if (ions)
-    {
-      if (from_atom >= 0 && from_atom < ions->R.size())
-        from_pos = ions->R[from_atom];
-      if (to_atom >= 0 && to_atom < ions->R.size())
-        to_pos = ions->R[to_atom];
+        app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
     }
 
-    // prepare a fake particle set
-    ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
-    GradVector SPO_g, SPO_g_avg;
-    int OrbitalSize(sposet.size());
-    SPO_v.resize(OrbitalSize);
-    SPO_g.resize(OrbitalSize);
-    SPO_l.resize(OrbitalSize);
-    SPO_v_avg.resize(OrbitalSize);
-    SPO_g_avg.resize(OrbitalSize);
-    SPO_l_avg.resize(OrbitalSize);
-    SPO_v_avg      = 0.0;
-    SPO_g_avg      = 0.0;
-    SPO_l_avg      = 0.0;
-    double Delta   = 1.0 / (nknots - 1);
-    int elec_count = target.R.size();
-    auto R_saved   = target.R;
-    ParticleSet::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
-    for (int icount = 0, ind = 0; icount < nknots; icount++, ind++)
+    // scanning a path
+    void
+    scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
     {
-      if (ind == elec_count)
-        ind = 0;
-      target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
-      target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
-      target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
-      target.makeMove(ind, zero_pos);
-      sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
-      std::ostringstream o;
-      o << "x_y_z  " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " "
-        << target.R[ind][2];
-      output_v << o.str() << " : " << std::scientific << std::setprecision(12);
-      output_g << o.str() << " : " << std::scientific << std::setprecision(12);
-      output_l << o.str() << " : " << std::scientific << std::setprecision(12);
-      for (int iorb = 0; iorb < OrbitalSize; iorb++)
-      {
-        SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
-        SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
-        SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
-        output_v << SPO_v[iorb] << "  ";
-        output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  " << SPO_g[iorb][2] << "  ";
-        output_l << SPO_l[iorb] << "  ";
-      }
-      output_v << std::endl;
-      output_g << std::endl;
-      output_l << std::endl;
+        std::string file_name;
+        file_name = prefix + "_v.dat";
+        std::ofstream output_v(file_name.c_str());
+        file_name = prefix + "_g.dat";
+        std::ofstream output_g(file_name.c_str());
+        file_name = prefix + "_l.dat";
+        std::ofstream output_l(file_name.c_str());
+        file_name = prefix + "_report.dat";
+        std::ofstream output_report(file_name.c_str());
+
+        int nknots(2);
+        int from_atom(-1);
+        int to_atom(-1);
+        TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
+        TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
+
+        OhmmsAttributeSet aAttrib;
+        aAttrib.add(nknots, "nknots");
+        aAttrib.add(from_atom, "from_atom");
+        aAttrib.add(to_atom, "to_atom");
+        aAttrib.add(from_pos, "from_pos");
+        aAttrib.add(to_pos, "to_pos");
+        aAttrib.put(cur);
+
+        // sanity check
+        if (nknots < 2)
+            nknots = 2;
+        // check out the reference atom coordinates
+        if (ions) {
+            if (from_atom >= 0 && from_atom < ions->R.size())
+                from_pos = ions->R[from_atom];
+            if (to_atom >= 0 && to_atom < ions->R.size())
+                to_pos = ions->R[to_atom];
+        }
+
+        // prepare a fake particle set
+        ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
+        GradVector SPO_g, SPO_g_avg;
+        int OrbitalSize(sposet.size());
+        SPO_v.resize(OrbitalSize);
+        SPO_g.resize(OrbitalSize);
+        SPO_l.resize(OrbitalSize);
+        SPO_v_avg.resize(OrbitalSize);
+        SPO_g_avg.resize(OrbitalSize);
+        SPO_l_avg.resize(OrbitalSize);
+        SPO_v_avg = 0.0;
+        SPO_g_avg = 0.0;
+        SPO_l_avg = 0.0;
+        double Delta = 1.0 / (nknots - 1);
+        int elec_count = target.R.size();
+        auto R_saved = target.R;
+        typename ParticleSetT<T>::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
+        for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) {
+            if (ind == elec_count)
+                ind = 0;
+            target.R[ind][0] =
+                (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
+            target.R[ind][1] =
+                (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
+            target.R[ind][2] =
+                (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
+            target.makeMove(ind, zero_pos);
+            sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
+            std::ostringstream o;
+            o << "x_y_z  " << std::fixed << std::setprecision(7)
+              << target.R[ind][0] << " " << target.R[ind][1] << " "
+              << target.R[ind][2];
+            output_v << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            output_g << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            output_l << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            for (int iorb = 0; iorb < OrbitalSize; iorb++) {
+                SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
+                SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
+                SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
+                output_v << SPO_v[iorb] << "  ";
+                output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  "
+                         << SPO_g[iorb][2] << "  ";
+                output_l << SPO_l[iorb] << "  ";
+            }
+            output_v << std::endl;
+            output_g << std::endl;
+            output_l << std::endl;
+        }
+        // restore the whole target.
+        target.R = R_saved;
+        target.update();
+        OutputReportMaker<T>{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(
+            output_report);
+        output_v.close();
+        output_g.close();
+        output_l.close();
+        output_report.close();
     }
-    // restore the whole target.
-    target.R = R_saved;
-    target.update();
-#ifdef QMC_COMPLEX
-    output_report << "#   Report: Orb   Value_avg I/R  Gradients_avg  Laplacian_avg" << std::endl;
-#else
-    output_report << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg" << std::endl;
-#endif
-    for (int iorb = 0; iorb < OrbitalSize; iorb++)
-      output_report << "\t" << iorb << "    " << std::scientific
-                    << SPO_v_avg[iorb] * static_cast<RealType>(1.0 / nknots) << "   "
-#ifdef QMC_COMPLEX
-                    << SPO_v_avg[iorb].imag() / SPO_v_avg[iorb].real() << "   "
-#endif
-                    << SPO_g_avg[iorb][0] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_g_avg[iorb][1] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_g_avg[iorb][2] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_l_avg[iorb] * static_cast<RealType>(1.0 / nknots) << std::fixed << std::endl;
-    output_v.close();
-    output_g.close();
-    output_l.close();
-    output_report.close();
-  }
 };
 } // namespace qmcplusplus
 
diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
index c20bda6513..2e6521e115 100644
--- a/src/QMCWaveFunctions/SPOSetT.cpp
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -1,20 +1,25 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory Raymond Clay III,
+//                    j.k.rofling@gmail.com, Lawrence Livermore National
+//                    Laboratory Jeremy McMinnis, jmcminis@gmail.com, University
+//                    of Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
+//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory William
+//                    F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SPOSetT.h"
@@ -24,409 +29,382 @@
 namespace qmcplusplus
 {
 
-template<class T>
-SPOSetT<T>::SPOSetT(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0)
-{}
-
-template<class T>
-void SPOSetT<T>::extractOptimizableObjectRefs(UniqueOptObjRefs&)
+template <class T>
+SPOSetT<T>::SPOSetT(const std::string& my_name) :
+    my_name_(my_name),
+    OrbitalSetSize(0)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::extractOptimizableObjectRefs "
-                           "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+template <class T>
+void
+SPOSetT<T>::extractOptimizableObjectRefs(UniqueOptObjRefsT<T>&)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::checkOutVariables "
-                           "must be overloaded when the SPOSet is optimizable.");
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::extractOptimizableObjectRefs "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
-                                   ValueVector& psi,
-                                   const ValueVector& psiinv,
-                                   std::vector<T>& ratios)
+template <class T>
+void
+SPOSetT<T>::checkOutVariables(const OptVariablesType<T>& active)
 {
-  assert(psi.size() == psiinv.size());
-  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
-  {
-    evaluateValue(VP, iat, psi);
-    ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size());
-  }
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::checkOutVariables "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-
-template<class T>
-void SPOSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                      const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
-                                      const RefVector<ValueVector>& psi_list,
-                                      const std::vector<const T*>& invRow_ptr_list,
-                                      std::vector<std::vector<T>>& ratios_list) const
+template <class T>
+void
+SPOSetT<T>::evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+    ValueVector& psi, const ValueVector& psiinv, std::vector<T>& ratios)
 {
-  assert(this == &spo_list.getLeader());
-  for (int iw = 0; iw < spo_list.size(); iw++)
-  {
-    Vector<T> invRow(const_cast<T*>(invRow_ptr_list[iw]), psi_list[iw].get().size());
-    spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]);
-  }
+    assert(psi.size() == psiinv.size());
+    for (int iat = 0; iat < VP.getTotalNum(); ++iat) {
+        evaluateValue(VP, iat, psi);
+        ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size());
+    }
 }
 
-template<class T>
-void SPOSetT<T>::evaluateVGL_spin(const ParticleSet& P,
-                                  int iat,
-                                  ValueVector& psi,
-                                  GradVector& dpsi,
-                                  ValueVector& d2psi,
-                                  ValueVector& dspin)
+template <class T>
+void
+SPOSetT<T>::mw_evaluateDetRatios(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+    const RefVector<ValueVector>& psi_list,
+    const std::vector<const T*>& invRow_ptr_list,
+    std::vector<std::vector<T>>& ratios_list) const
 {
-  throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin");
+    assert(this == &spo_list.getLeader());
+    for (int iw = 0; iw < spo_list.size(); iw++) {
+        Vector<T> invRow(
+            const_cast<T*>(invRow_ptr_list[iw]), psi_list[iw].get().size());
+        spo_list[iw].evaluateDetRatios(
+            vp_list[iw], psi_list[iw], invRow, ratios_list[iw]);
+    }
 }
 
-template<class T>
-void SPOSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                const RefVectorWithLeader<ParticleSet>& P_list,
-                                int iat,
-                                const RefVector<ValueVector>& psi_v_list,
-                                const RefVector<GradVector>& dpsi_v_list,
-                                const RefVector<ValueVector>& d2psi_v_list) const
+template <class T>
+void
+SPOSetT<T>::evaluateVGL_spin(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin)
 {
-  assert(this == &spo_list.getLeader());
-  for (int iw = 0; iw < spo_list.size(); iw++)
-    spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]);
+    throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin");
 }
 
-template<class T>
-void SPOSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                  const RefVectorWithLeader<ParticleSet>& P_list,
-                                  int iat,
-                                  const RefVector<ValueVector>& psi_v_list) const
+template <class T>
+void
+SPOSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list,
+    const RefVector<GradVector>& dpsi_v_list,
+    const RefVector<ValueVector>& d2psi_v_list) const
 {
-  assert(this == &spo_list.getLeader());
-  for (int iw = 0; iw < spo_list.size(); iw++)
-    spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]);
+    assert(this == &spo_list.getLeader());
+    for (int iw = 0; iw < spo_list.size(); iw++)
+        spo_list[iw].evaluateVGL(
+            P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]);
 }
 
-template<class T>
-void SPOSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                        const RefVectorWithLeader<ParticleSet>& P_list,
-                                        int iat,
-                                        const RefVector<ValueVector>& psi_v_list,
-                                        const RefVector<GradVector>& dpsi_v_list,
-                                        const RefVector<ValueVector>& d2psi_v_list,
-                                        OffloadMatrix<ComplexType>& mw_dspin) const
+template <class T>
+void
+SPOSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list) const
 {
-  throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n");
+    assert(this == &spo_list.getLeader());
+    for (int iw = 0; iw < spo_list.size(); iw++)
+        spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]);
 }
 
+template <class T>
+void
+SPOSetT<T>::mw_evaluateVGLWithSpin(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list,
+    const RefVector<GradVector>& dpsi_v_list,
+    const RefVector<ValueVector>& d2psi_v_list,
+    OffloadMatrix<ComplexType>& mw_dspin) const
+{
+    throw std::runtime_error(
+        getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n");
+}
 
-template<class T>
-void SPOSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                                const RefVectorWithLeader<ParticleSet>& P_list,
-                                                int iat,
-                                                const std::vector<const T*>& invRow_ptr_list,
-                                                OffloadMWVGLArray& phi_vgl_v,
-                                                std::vector<T>& ratios,
-                                                std::vector<GradType>& grads) const
+template <class T>
+void
+SPOSetT<T>::mw_evaluateVGLandDetRatioGrads(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
+    std::vector<T>& ratios, std::vector<GradType>& grads) const
 {
-  assert(this == &spo_list.getLeader());
-  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
-  assert(phi_vgl_v.size(1) == spo_list.size());
-  const size_t nw             = spo_list.size();
-  const size_t norb_requested = phi_vgl_v.size(2);
-  GradVector dphi_v(norb_requested);
-  for (int iw = 0; iw < nw; iw++)
-  {
-    ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested);
-    ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested);
-    spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v);
-
-    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested);
-    grads[iw]  = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw];
-
-    // transpose the array of gradients to SoA in phi_vgl_v
-    for (size_t idim = 0; idim < DIM; idim++)
-    {
-      T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0);
-      for (size_t iorb = 0; iorb < norb_requested; iorb++)
-        phi_g[iorb] = dphi_v[iorb][idim];
+    assert(this == &spo_list.getLeader());
+    assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+    assert(phi_vgl_v.size(1) == spo_list.size());
+    const size_t nw = spo_list.size();
+    const size_t norb_requested = phi_vgl_v.size(2);
+    GradVector dphi_v(norb_requested);
+    for (int iw = 0; iw < nw; iw++) {
+        ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested);
+        ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested);
+        spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v);
+
+        ratios[iw] =
+            simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested);
+        grads[iw] =
+            simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) /
+            ratios[iw];
+
+        // transpose the array of gradients to SoA in phi_vgl_v
+        for (size_t idim = 0; idim < DIM; idim++) {
+            T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0);
+            for (size_t iorb = 0; iorb < norb_requested; iorb++)
+                phi_g[iorb] = dphi_v[iorb][idim];
+        }
     }
-  }
-  phi_vgl_v.updateTo();
+    phi_vgl_v.updateTo();
 }
 
-template<class T>
-void SPOSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                                        const RefVectorWithLeader<ParticleSet>& P_list,
-                                                        int iat,
-                                                        const std::vector<const T*>& invRow_ptr_list,
-                                                        OffloadMWVGLArray& phi_vgl_v,
-                                                        std::vector<T>& ratios,
-                                                        std::vector<GradType>& grads,
-                                                        std::vector<T>& spingrads) const
+template <class T>
+void
+SPOSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
+    std::vector<T>& ratios, std::vector<GradType>& grads,
+    std::vector<T>& spingrads) const
 {
-  throw std::runtime_error("Need specialization of " + getClassName() +
-                           "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n");
+    throw std::runtime_error("Need specialization of " + getClassName() +
+        "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+template <class T>
+void
+SPOSetT<T>::evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
+    GGGMatrix& grad_grad_grad_logdet)
 {
-  throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n");
+    throw std::runtime_error(
+        "Need specialization of SPOSet::evaluateThirdDeriv(). \n");
 }
 
-template<class T>
-void SPOSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
-                                           int first,
-                                           int last,
-                                           ValueMatrix& logdet,
-                                           GradMatrix& dlogdet,
-                                           ValueMatrix& d2logdet,
-                                           ValueMatrix& dspinlogdet)
+template <class T>
+void
+SPOSetT<T>::evaluate_notranspose_spin(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
+    ValueMatrix& dspinlogdet)
 {
-  throw std::runtime_error("Need specialization of " + getClassName() +
-                           "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n");
+    throw std::runtime_error("Need specialization of " + getClassName() +
+        "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) "
+        "(vector quantities)\n");
 }
 
-template<class T>
-void SPOSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                         const RefVectorWithLeader<ParticleSet>& P_list,
-                                         int first,
-                                         int last,
-                                         const RefVector<ValueMatrix>& logdet_list,
-                                         const RefVector<GradMatrix>& dlogdet_list,
-                                         const RefVector<ValueMatrix>& d2logdet_list) const
+template <class T>
+void
+SPOSetT<T>::mw_evaluate_notranspose(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
+    const RefVector<ValueMatrix>& logdet_list,
+    const RefVector<GradMatrix>& dlogdet_list,
+    const RefVector<ValueMatrix>& d2logdet_list) const
 {
-  assert(this == &spo_list.getLeader());
-  for (int iw = 0; iw < spo_list.size(); iw++)
-    spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]);
+    assert(this == &spo_list.getLeader());
+    for (int iw = 0; iw < spo_list.size(); iw++)
+        spo_list[iw].evaluate_notranspose(P_list[iw], first, last,
+            logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]);
 }
 
-template<class T>
-void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                      int first,
-                                      int last,
-                                      ValueMatrix& logdet,
-                                      GradMatrix& dlogdet,
-                                      HessMatrix& grad_grad_logdet)
+template <class T>
+void
+SPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet)
 {
-  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n");
+    throw std::runtime_error(
+        "Need specialization of SPOSet::evaluate_notranspose() for "
+        "grad_grad_logdet. \n");
 }
 
-template<class T>
-void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                      int first,
-                                      int last,
-                                      ValueMatrix& logdet,
-                                      GradMatrix& dlogdet,
-                                      HessMatrix& grad_grad_logdet,
-                                      GGGMatrix& grad_grad_grad_logdet)
+template <class T>
+void
+SPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
+    GGGMatrix& grad_grad_grad_logdet)
 {
-  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n");
+    throw std::runtime_error(
+        "Need specialization of SPOSet::evaluate_notranspose() for "
+        "grad_grad_grad_logdet. \n");
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> SPOSetT<T>::makeClone() const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+SPOSetT<T>::makeClone() const
 {
-  throw std::runtime_error("Missing  SPOSet::makeClone for " + getClassName());
+    throw std::runtime_error(
+        "Missing  SPOSet::makeClone for " + getClassName());
 }
 
-template<class T>
-void SPOSetT<T>::basic_report(const std::string& pad) const
+template <class T>
+void
+SPOSetT<T>::basic_report(const std::string& pad) const
 {
-  app_log() << pad << "size = " << size() << std::endl;
-  app_log() << pad << "state info:" << std::endl;
-  //states.report(pad+"  ");
-  app_log().flush();
+    app_log() << pad << "size = " << size() << std::endl;
+    app_log() << pad << "state info:" << std::endl;
+    // states.report(pad+"  ");
+    app_log().flush();
 }
 
-template<class T>
-void SPOSetT<T>::evaluateVGH(const ParticleSet& P,
-                             int iat,
-                             ValueVector& psi,
-                             GradVector& dpsi,
-                             HessVector& grad_grad_psi)
+template <class T>
+void
+SPOSetT<T>::evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+    GradVector& dpsi, HessVector& grad_grad_psi)
 {
-  throw std::runtime_error("Need specialization of " + getClassName() +
-                           "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n");
+    throw std::runtime_error("Need specialization of " + getClassName() +
+        "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateVGHGH(const ParticleSet& P,
-                               int iat,
-                               ValueVector& psi,
-                               GradVector& dpsi,
-                               HessVector& grad_grad_psi,
-                               GGGVector& grad_grad_grad_psi)
+template <class T>
+void
+SPOSetT<T>::evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+    GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi)
 {
-  throw std::runtime_error("Need specialization of " + getClassName() +
-                           "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n");
+    throw std::runtime_error("Need specialization of " + getClassName() +
+        "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n");
 }
 
-template<class T>
-void SPOSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+template <class T>
+void
+SPOSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
 {
-  if (isRotationSupported())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::applyRotation "
-                           "must be overloaded when the SPOSet supports rotation.");
+    if (isRotationSupported())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::applyRotation "
+            "must be overloaded when the SPOSet supports rotation.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
-                                     const opt_variables_type& optvars,
-                                     Vector<T>& dlogpsi,
-                                     Vector<T>& dhpsioverpsi,
-                                     const int& FirstIndex,
-                                     const int& LastIndex)
+template <class T>
+void
+SPOSetT<T>::evaluateDerivatives(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const int& FirstIndex, const int& LastIndex)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateDerivatives "
-                           "must be overloaded when the SPOSet is optimizable.");
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateDerivatives "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                       const opt_variables_type& optvars,
-                                       Vector<T>& dlogpsi,
-                                       int FirstIndex,
-                                       int LastIndex)
+template <class T>
+void
+SPOSetT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi, int FirstIndex,
+    int LastIndex)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateDerivativesWF "
-                           "must be overloaded when the SPOSet is optimizable.");
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateDerivativesWF "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
-                                     const opt_variables_type& optvars,
-                                     ValueVector& psi,
-                                     const ValueVector& psiinv,
-                                     std::vector<T>& ratios,
-                                     Matrix<T>& dratios,
-                                     int FirstIndex,
-                                     int LastIndex)
+template <class T>
+void
+SPOSetT<T>::evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+    const OptVariablesType<T>& optvars, ValueVector& psi,
+    const ValueVector& psiinv, std::vector<T>& ratios, Matrix<T>& dratios,
+    int FirstIndex, int LastIndex)
 {
-  // Match the fallback in WaveFunctionComponent that evaluates just the ratios
-  evaluateDetRatios(VP, psi, psiinv, ratios);
-
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateDerivRatios "
-                           "must be overloaded when the SPOSet is optimizable.");
+    // Match the fallback in WaveFunctionComponent that evaluates just the
+    // ratios
+    evaluateDetRatios(VP, psi, psiinv, ratios);
+
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateDerivRatios "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
-                                     const opt_variables_type& optvars,
-                                     Vector<T>& dlogpsi,
-                                     Vector<T>& dhpsioverpsi,
-                                     const T& psiCurrent,
-                                     const std::vector<T>& Coeff,
-                                     const std::vector<size_t>& C2node_up,
-                                     const std::vector<size_t>& C2node_dn,
-                                     const ValueVector& detValues_up,
-                                     const ValueVector& detValues_dn,
-                                     const GradMatrix& grads_up,
-                                     const GradMatrix& grads_dn,
-                                     const ValueMatrix& lapls_up,
-                                     const ValueMatrix& lapls_dn,
-                                     const ValueMatrix& M_up,
-                                     const ValueMatrix& M_dn,
-                                     const ValueMatrix& Minv_up,
-                                     const ValueMatrix& Minv_dn,
-                                     const GradMatrix& B_grad,
-                                     const ValueMatrix& B_lapl,
-                                     const std::vector<int>& detData_up,
-                                     const size_t N1,
-                                     const size_t N2,
-                                     const size_t NP1,
-                                     const size_t NP2,
-                                     const std::vector<std::vector<int>>& lookup_tbl)
+template <class T>
+void
+SPOSetT<T>::evaluateDerivatives(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const GradMatrix& grads_up, const GradMatrix& grads_dn,
+    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
+    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
+    const size_t NP1, const size_t NP2,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateDerivatives "
-                           "must be overloaded when the SPOSet is optimizable.");
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateDerivatives "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                       const opt_variables_type& optvars,
-                                       Vector<ValueType>& dlogpsi,
-                                       const ValueType& psiCurrent,
-                                       const std::vector<T>& Coeff,
-                                       const std::vector<size_t>& C2node_up,
-                                       const std::vector<size_t>& C2node_dn,
-                                       const ValueVector& detValues_up,
-                                       const ValueVector& detValues_dn,
-                                       const ValueMatrix& M_up,
-                                       const ValueMatrix& M_dn,
-                                       const ValueMatrix& Minv_up,
-                                       const ValueMatrix& Minv_dn,
-                                       const std::vector<int>& detData_up,
-                                       const std::vector<std::vector<int>>& lookup_tbl)
+template <class T>
+void
+SPOSetT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
+    const OptVariablesType<T>& optvars, Vector<ValueType>& dlogpsi,
+    const ValueType& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const std::vector<int>& detData_up,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  if (isOptimizable())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateDerivativesWF "
-                           "must be overloaded when the SPOSet is optimizable.");
+    if (isOptimizable())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateDerivativesWF "
+            "must be overloaded when the SPOSet is optimizable.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                    int first,
-                                    int last,
-                                    const ParticleSet& source,
-                                    int iat_src,
-                                    GradMatrix& gradphi)
+template <class T>
+void
+SPOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+    const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
 {
-  if (hasIonDerivs())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateGradSource "
-                           "must be overloaded when the SPOSet has ion derivatives.");
+    if (hasIonDerivs())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateGradSource "
+            "must be overloaded when the SPOSet has ion derivatives.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                    int first,
-                                    int last,
-                                    const ParticleSet& source,
-                                    int iat_src,
-                                    GradMatrix& grad_phi,
-                                    HessMatrix& grad_grad_phi,
-                                    GradMatrix& grad_lapl_phi)
+template <class T>
+void
+SPOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+    const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+    HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi)
 {
-  if (hasIonDerivs())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateGradSource "
-                           "must be overloaded when the SPOSet has ion derivatives.");
+    if (hasIonDerivs())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateGradSource "
+            "must be overloaded when the SPOSet has ion derivatives.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
-                                       int iel,
-                                       const ParticleSet& source,
-                                       int iat_src,
-                                       GradVector& gradphi)
+template <class T>
+void
+SPOSetT<T>::evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
+    const ParticleSetT<T>& source, int iat_src, GradVector& gradphi)
 {
-  if (hasIonDerivs())
-    throw std::logic_error("Bug!! " + getClassName() +
-                           "::evaluateGradSourceRow "
-                           "must be overloaded when the SPOSet has ion derivatives.");
+    if (hasIonDerivs())
+        throw std::logic_error("Bug!! " + getClassName() +
+            "::evaluateGradSourceRow "
+            "must be overloaded when the SPOSet has ion derivatives.");
 }
 
-template<class T>
-void SPOSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+template <class T>
+void
+SPOSetT<T>::evaluate_spin(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi)
 {
-  throw std::runtime_error("Need specialization of " + getClassName() +
-                           "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n");
+    throw std::runtime_error("Need specialization of " + getClassName() +
+        "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n");
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
index 6e12c3e929..f3fd993c5c 100644
--- a/src/QMCWaveFunctions/SPOSetT.h
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -1,581 +1,647 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
+// Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory Raymond Clay III,
+//                    j.k.rofling@gmail.com, Lawrence Livermore National
+//                    Laboratory Jeremy McMinnis, jmcminis@gmail.com, University
+//                    of Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
+//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
+//                    Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak
+//                    Ridge National Laboratory Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory William
+//                    F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SPOSETT_H
 #define QMCPLUSPLUS_SPOSETT_H
 
+#include "DualAllocatorAliases.hpp"
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
 #include "OhmmsPETE/OhmmsArray.h"
-#include "Particle/ParticleSet.h"
-#include "Particle/VirtualParticleSet.h"
+#include "OptimizableObjectT.h"
+#include "Particle/ParticleSetT.h"
+#include "Particle/VirtualParticleSetT.h"
 #include "QMCWaveFunctions/OrbitalSetTraits.h"
-#include "OptimizableObject.h"
-#include "OMPTarget/OffloadAlignedAllocators.hpp"
-#include "DualAllocatorAliases.hpp"
 
 namespace qmcplusplus
 {
 class ResourceCollection;
 
-template<class T>
+template <class T>
 class SPOSetT;
 namespace testing
 {
-opt_variables_type& getMyVars(SPOSetT<float>& spo);
-opt_variables_type& getMyVars(SPOSetT<double>& spo);
-opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& spo);
-opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& spo);
+OptVariablesType<float>&
+getMyVars(SPOSetT<float>& spo);
+OptVariablesType<double>&
+getMyVars(SPOSetT<double>& spo);
+OptVariablesType<std::complex<float>>&
+getMyVars(SPOSetT<std::complex<float>>& spo);
+OptVariablesType<std::complex<double>>&
+getMyVars(SPOSetT<std::complex<double>>& spo);
 } // namespace testing
 
-
 /** base class for Single-particle orbital sets
  *
  * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains
- * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$
+ * a number of single-particle orbitals with capabilities of evaluating \f$
+ * \psi_j({\bf r}_i)\f$
  */
-template<class T>
+template <class T>
 class SPOSetT : public QMCTraits
 {
 public:
-  using ValueVector       = typename OrbitalSetTraits<T>::ValueVector;
-  using ValueMatrix       = typename OrbitalSetTraits<T>::ValueMatrix;
-  using GradVector        = typename OrbitalSetTraits<T>::GradVector;
-  using GradMatrix        = typename OrbitalSetTraits<T>::GradMatrix;
-  using GradType          = TinyVector<T, DIM>;
-  using HessVector        = typename OrbitalSetTraits<T>::HessVector;
-  using HessMatrix        = typename OrbitalSetTraits<T>::HessMatrix;
-  using GGGVector         = typename OrbitalSetTraits<T>::GradHessVector;
-  using GGGMatrix         = typename OrbitalSetTraits<T>::GradHessMatrix;
-  using SPOMap            = std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
-  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
-  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
-  using PosType           = typename OrbitalSetTraits<T>::PosType;
-  using RealType          = typename OrbitalSetTraits<T>::RealType;
-  using ValueType         = typename OrbitalSetTraits<T>::ValueType;
-  using FullRealType      = typename OrbitalSetTraits<double>::RealType;
-  template<typename DT>
-  using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
-
-  /** constructor */
-  SPOSetT<T>(const std::string& my_name);
-
-  /** destructor
-   *
-   * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet.
-   */
-  virtual ~SPOSetT<T>() = default;
-
-  /** return the size of the orbital set
-   * Ye: this needs to be replaced by getOrbitalSetSize();
-   */
-  inline int size() const { return OrbitalSetSize; }
-
-  /** print basic SPOSet information
-   */
-  void basic_report(const std::string& pad = "") const;
-
-  /** print SPOSet information
-   */
-  virtual void report(const std::string& pad = "") const { basic_report(pad); }
-
-
-  /** return the size of the orbitals
-   */
-  inline int getOrbitalSetSize() const { return OrbitalSetSize; }
-
-  /// Query if this SPOSet is optimizable
-  virtual bool isOptimizable() const { return false; }
-
-  /** extract underlying OptimizableObject references
-   * @param opt_obj_refs aggregated list of optimizable object references
-   */
-  virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs);
-
-  /** check out variational optimizable variables
-   * @param active a super set of optimizable variables
-   */
-  virtual void checkOutVariables(const opt_variables_type& active);
-
-  /// Query if this SPOSet uses OpenMP offload
-  virtual bool isOMPoffload() const { return false; }
-
-  /** Query if this SPOSet has an explicit ion dependence. returns true if it does.
-  */
-  virtual bool hasIonDerivs() const { return false; }
-
-  /// check a few key parameters before putting the SPO into a determinant
-  virtual void checkObject() const {}
-
-  /// return true if this SPOSet can be wrappered by RotatedSPO
-  virtual bool isRotationSupported() const { return false; }
-  /// store parameters before getting destroyed by rotation.
-  virtual void storeParamsBeforeRotation() {}
-  /// apply rotation to all the orbitals
-  virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
-
-  /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction
-  virtual void evaluateDerivatives(ParticleSet& P,
-                                   const opt_variables_type& optvars,
-                                   Vector<T>& dlogpsi,
-                                   Vector<T>& dhpsioverpsi,
-                                   const int& FirstIndex,
-                                   const int& LastIndex);
-
-  /// Parameter derivatives of the wavefunction
-  virtual void evaluateDerivativesWF(ParticleSet& P,
-                                     const opt_variables_type& optvars,
-                                     Vector<T>& dlogpsi,
-                                     int FirstIndex,
-                                     int LastIndex);
-
-  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
-   *  this is used only for MSD, to be refined for better serving both single and multi SD
-   */
-  virtual void evaluateDerivatives(ParticleSet& P,
-                                   const opt_variables_type& optvars,
-                                   Vector<T>& dlogpsi,
-                                   Vector<T>& dhpsioverpsi,
-                                   const T& psiCurrent,
-                                   const std::vector<T>& Coeff,
-                                   const std::vector<size_t>& C2node_up,
-                                   const std::vector<size_t>& C2node_dn,
-                                   const ValueVector& detValues_up,
-                                   const ValueVector& detValues_dn,
-                                   const GradMatrix& grads_up,
-                                   const GradMatrix& grads_dn,
-                                   const ValueMatrix& lapls_up,
-                                   const ValueMatrix& lapls_dn,
-                                   const ValueMatrix& M_up,
-                                   const ValueMatrix& M_dn,
-                                   const ValueMatrix& Minv_up,
-                                   const ValueMatrix& Minv_dn,
-                                   const GradMatrix& B_grad,
-                                   const ValueMatrix& B_lapl,
-                                   const std::vector<int>& detData_up,
-                                   const size_t N1,
-                                   const size_t N2,
-                                   const size_t NP1,
-                                   const size_t NP2,
-                                   const std::vector<std::vector<int>>& lookup_tbl);
-
-  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
-   *  this is used only for MSD, to be refined for better serving both single and multi SD
-   */
-  virtual void evaluateDerivativesWF(ParticleSet& P,
-                                     const opt_variables_type& optvars,
-                                     Vector<ValueType>& dlogpsi,
-                                     const ValueType& psiCurrent,
-                                     const std::vector<T>& Coeff,
-                                     const std::vector<size_t>& C2node_up,
-                                     const std::vector<size_t>& C2node_dn,
-                                     const ValueVector& detValues_up,
-                                     const ValueVector& detValues_dn,
-                                     const ValueMatrix& M_up,
-                                     const ValueMatrix& M_dn,
-                                     const ValueMatrix& Minv_up,
-                                     const ValueMatrix& Minv_dn,
-                                     const std::vector<int>& detData_up,
-                                     const std::vector<std::vector<int>>& lookup_tbl);
-
-  /** set the OrbitalSetSize
-   * @param norbs number of single-particle orbitals
-   * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly.
-   * It doesn't make sense allowing to set the value at any place in the code.
-   * @TODO make it purely virtual
-   */
-  virtual void setOrbitalSetSize(int norbs){};
-
-  /** evaluate the values of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @TODO make it purely virtual
-   */
-  virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi){};
-
-  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
-   * @param VP virtual particle set
-   * @param psi values of the SPO, used as a scratch space if needed
-   * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually
-   * @param ratios return determinant ratios
-   */
-  virtual void evaluateDetRatios(const VirtualParticleSet& VP,
-                                 ValueVector& psi,
-                                 const ValueVector& psiinv,
-                                 std::vector<T>& ratios);
-
-
-  /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves
-  virtual void evaluateDerivRatios(const VirtualParticleSet& VP,
-                                   const opt_variables_type& optvars,
-                                   ValueVector& psi,
-                                   const ValueVector& psiinv,
-                                   std::vector<T>& ratios,
-                                   Matrix<T>& dratios,
-                                   int FirstIndex,
-                                   int LastIndex);
-
-
-  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param vp_list a list of virtual particle sets in a walker batch
-   * @param psi_list a list of values of the SPO, used as a scratch space if needed
-   * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually
-   * @param ratios_list a list of returning determinant ratios
-   */
-  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                    const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
-                                    const RefVector<ValueVector>& psi_list,
-                                    const std::vector<const T*>& invRow_ptr_list,
-                                    std::vector<std::vector<T>>& ratios_list) const;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param d2psi laplacians of the SPO
-   * @TODO make this purely virtual
-   */
-  virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){};
-
-  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param d2psi laplacians of the SPO
-   * @param dspin spin gradients of the SPO
-   */
-  virtual void evaluateVGL_spin(const ParticleSet& P,
-                                int iat,
-                                ValueVector& psi,
-                                GradVector& dpsi,
-                                ValueVector& d2psi,
-                                ValueVector& dspin);
-
-  /** evaluate the values this single-particle orbital sets of multiple walkers
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param psi_v_list the list of value vector pointers in a walker batch
-   */
-  virtual void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                const RefVectorWithLeader<ParticleSet>& P_list,
-                                int iat,
-                                const RefVector<ValueVector>& psi_v_list) const;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param psi_v_list the list of value vector pointers in a walker batch
-   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
-   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
-   */
-  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                              const RefVectorWithLeader<ParticleSet>& P_list,
-                              int iat,
-                              const RefVector<ValueVector>& psi_v_list,
-                              const RefVector<GradVector>& dpsi_v_list,
-                              const RefVector<ValueVector>& d2psi_v_list) const;
-
-  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param psi_v_list the list of value vector pointers in a walker batch
-   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
-   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
-   * @param mw_dspin is a dual matrix of spin gradients [nw][norb]
-   * Note that the device side of mw_dspin is up to date
-   */
-  virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                      const RefVectorWithLeader<ParticleSet>& P_list,
-                                      int iat,
-                                      const RefVector<ValueVector>& psi_v_list,
-                                      const RefVector<GradVector>& dpsi_v_list,
-                                      const RefVector<ValueVector>& d2psi_v_list,
-                                      OffloadMatrix<ComplexType>& mw_dspin) const;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
-   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
-   * @param psi_ratio_grads_v determinant ratio and grads of all the walkers
-   */
-  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                              const RefVectorWithLeader<ParticleSet>& P_list,
-                                              int iat,
-                                              const std::vector<const T*>& invRow_ptr_list,
-                                              OffloadMWVGLArray& phi_vgl_v,
-                                              std::vector<T>& ratios,
-                                              std::vector<GradType>& grads) const;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
-   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
-   *  Includes spin gradients
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
-   * @param ratios, ratios of all walkers
-   * @param grads, spatial gradients of all walkers
-   * @param spingrads, spin gradients of all walkers
-   */
-  virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                                      const RefVectorWithLeader<ParticleSet>& P_list,
-                                                      int iat,
-                                                      const std::vector<const T*>& invRow_ptr_list,
-                                                      OffloadMWVGLArray& phi_vgl_v,
-                                                      std::vector<T>& ratios,
-                                                      std::vector<GradType>& grads,
-                                                      std::vector<T>& spingrads) const;
-
-  /** evaluate the values, gradients and hessians of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param grad_grad_psi hessians of the SPO
-   */
-  virtual void evaluateVGH(const ParticleSet& P,
-                           int iat,
-                           ValueVector& psi,
-                           GradVector& dpsi,
-                           HessVector& grad_grad_psi);
-
-  /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param grad_grad_psi hessians of the SPO
-   * @param grad_grad_grad_psi grad hessians of the SPO
-   */
-  virtual void evaluateVGHGH(const ParticleSet& P,
-                             int iat,
-                             ValueVector& psi,
-                             GradVector& dpsi,
-                             HessVector& grad_grad_psi,
-                             GGGVector& grad_grad_grad_psi);
-
-  /** evaluate the values of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   */
-  virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi);
-
-  /** evaluate the third derivatives of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param first first particle
-   * @param last last particle
-   * @param grad_grad_grad_logdet third derivatives of the SPO
-   */
-  virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet);
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
-   * @param[in] P current ParticleSet
-   * @param[in] first starting index of the particles
-   * @param[in] last ending index of the particles
-   * @param[out] logdet determinant matrix to be inverted
-   * @param[out] dlogdet gradients
-   * @param[out] d2logdet laplacians
-   * @TODO make this pure virtual
-   */
-  virtual void evaluate_notranspose(const ParticleSet& P,
-                                    int first,
-                                    int last,
-                                    ValueMatrix& logdet,
-                                    GradMatrix& dlogdet,
-                                    ValueMatrix& d2logdet){};
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param logdet determinant matrix to be inverted
-   * @param dlogdet gradients
-   * @param d2logdet laplacians
-   * @param dspinlogdet, spin gradients
-   *
-   * default implementation will abort for all SPOSets except SpinorSet
-   *
-   */
-  virtual void evaluate_notranspose_spin(const ParticleSet& P,
-                                         int first,
-                                         int last,
-                                         ValueMatrix& logdet,
-                                         GradMatrix& dlogdet,
-                                         ValueMatrix& d2logdet,
-                                         ValueMatrix& dspinlogdet);
-
-  virtual void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                       const RefVectorWithLeader<ParticleSet>& P_list,
-                                       int first,
-                                       int last,
-                                       const RefVector<ValueMatrix>& logdet_list,
-                                       const RefVector<GradMatrix>& dlogdet_list,
-                                       const RefVector<ValueMatrix>& d2logdet_list) const;
-
-  /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param logdet determinant matrix to be inverted
-   * @param dlogdet gradients
-   * @param grad_grad_logdet hessians
-   *
-   */
-  virtual void evaluate_notranspose(const ParticleSet& P,
-                                    int first,
-                                    int last,
-                                    ValueMatrix& logdet,
-                                    GradMatrix& dlogdet,
-                                    HessMatrix& grad_grad_logdet);
-
-  /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param logdet determinant matrix to be inverted
-   * @param dlogdet gradients
-   * @param grad_grad_logdet hessians
-   * @param grad_grad_grad_logdet third derivatives
-   *
-   */
-  virtual void evaluate_notranspose(const ParticleSet& P,
-                                    int first,
-                                    int last,
-                                    ValueMatrix& logdet,
-                                    GradMatrix& dlogdet,
-                                    HessMatrix& grad_grad_logdet,
-                                    GGGMatrix& grad_grad_grad_logdet);
-
-  /** evaluate the gradients of this single-particle orbital
-   *  for [first,last) target particles with respect to the given source particle
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param iat_src source particle index
-   * @param gradphi gradients
-   *
-   */
-  virtual void evaluateGradSource(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  const ParticleSet& source,
-                                  int iat_src,
-                                  GradMatrix& gradphi);
-
-  /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital
-   *  for [first,last) target particles with respect to the given source particle
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param iat_src source particle index
-   * @param gradphi gradients of values
-   * @param grad_grad_phi gradients of gradients
-   * @param grad_lapl_phi gradients of laplacians
-   *
-   */
-  virtual void evaluateGradSource(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  const ParticleSet& source,
-                                  int iat_src,
-                                  GradMatrix& grad_phi,
-                                  HessMatrix& grad_grad_phi,
-                                  GradMatrix& grad_lapl_phi);
-
-  /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.  
-   *
-   *  @param[in] P particle set.
-   *  @param[in] iel The electron at which to evaluate phi(r_iel)
-   *  @param[in] source ion particle set.
-   *  @param[in] iat_src ion ID w.r.t. which to take derivative.
-   *  @param[in,out] gradphi Vector of d/dR_iat phi_j(r).
-   *  @return Void
-   */
-  virtual void evaluateGradSourceRow(const ParticleSet& P,
-                                     int iel,
-                                     const ParticleSet& source,
-                                     int iat_src,
-                                     GradVector& gradphi);
-
-  /** access the k point related to the given orbital */
-  virtual PosType get_k(int orb) { return PosType(); }
-
-  /** initialize a shared resource and hand it to collection
-   */
-  virtual void createResource(ResourceCollection& collection) const {}
-
-  /** acquire a shared resource from collection
-   */
-  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
-
-  /** return a shared resource to collection
-   */
-  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
-
-  /** make a clone of itself
-   * every derived class must implement this to have threading working correctly.
-   */
-  [[noreturn]] virtual std::unique_ptr<SPOSetT<T>> makeClone() const;
-
-  /** Used only by cusp correction in AOS LCAO.
-   * Ye: the SoA LCAO moves all this responsibility to the builder.
-   * This interface should be removed with AoS.
-   */
-  virtual bool transformSPOSet() { return true; }
-
-  /** finalize the construction of SPOSet
-   *
-   * for example, classes serving accelerators may need to transfer data from host to device
-   * after the host side objects are built.
-   */
-  virtual void finalizeConstruction() {}
-
-  /// return object name
-  const std::string& getName() const { return my_name_; }
-
-  /// @TODO make this purely virutal return class name
-  virtual std::string getClassName() const { return ""; };
+    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
+    using ValueMatrix = typename OrbitalSetTraits<T>::ValueMatrix;
+    using GradVector = typename OrbitalSetTraits<T>::GradVector;
+    using GradMatrix = typename OrbitalSetTraits<T>::GradMatrix;
+    using GradType = TinyVector<T, DIM>;
+    using HessVector = typename OrbitalSetTraits<T>::HessVector;
+    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
+    using GGGVector = typename OrbitalSetTraits<T>::GradHessVector;
+    using GGGMatrix = typename OrbitalSetTraits<T>::GradHessMatrix;
+    using SPOMap =
+        std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
+    using OffloadMWVGLArray =
+        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+    using OffloadMWVArray =
+        Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+    using PosType = typename OrbitalSetTraits<T>::PosType;
+    using RealType = typename OrbitalSetTraits<T>::RealType;
+    using ValueType = typename OrbitalSetTraits<T>::ValueType;
+    using FullRealType = typename OrbitalSetTraits<double>::RealType;
+    template <typename DT>
+    using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
+
+    /** constructor */
+    SPOSetT<T>(const std::string& my_name);
+
+    /** destructor
+     *
+     * Derived class destructor needs to pay extra attention to freeing memory
+     * shared among clones of SPOSet.
+     */
+    virtual ~SPOSetT<T>() = default;
+
+    /** return the size of the orbital set
+     * Ye: this needs to be replaced by getOrbitalSetSize();
+     */
+    inline int
+    size() const
+    {
+        return OrbitalSetSize;
+    }
+
+    /** print basic SPOSet information
+     */
+    void
+    basic_report(const std::string& pad = "") const;
+
+    /** print SPOSet information
+     */
+    virtual void
+    report(const std::string& pad = "") const
+    {
+        basic_report(pad);
+    }
+
+    /** return the size of the orbitals
+     */
+    inline int
+    getOrbitalSetSize() const
+    {
+        return OrbitalSetSize;
+    }
+
+    /// Query if this SPOSet is optimizable
+    virtual bool
+    isOptimizable() const
+    {
+        return false;
+    }
+
+    /** extract underlying OptimizableObject references
+     * @param opt_obj_refs aggregated list of optimizable object references
+     */
+    virtual void
+    extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs);
+
+    /** check out variational optimizable variables
+     * @param active a super set of optimizable variables
+     */
+    virtual void
+    checkOutVariables(const OptVariablesType<T>& active);
+
+    /// Query if this SPOSet uses OpenMP offload
+    virtual bool
+    isOMPoffload() const
+    {
+        return false;
+    }
+
+    /** Query if this SPOSet has an explicit ion dependence. returns true if it
+     * does.
+     */
+    virtual bool
+    hasIonDerivs() const
+    {
+        return false;
+    }
+
+    /// check a few key parameters before putting the SPO into a determinant
+    virtual void
+    checkObject() const
+    {
+    }
+
+    /// return true if this SPOSet can be wrappered by RotatedSPO
+    virtual bool
+    isRotationSupported() const
+    {
+        return false;
+    }
+    /// store parameters before getting destroyed by rotation.
+    virtual void
+    storeParamsBeforeRotation()
+    {
+    }
+    /// apply rotation to all the orbitals
+    virtual void
+    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
+
+    /// Parameter derivatives of the wavefunction and the Laplacian of the
+    /// wavefunction
+    virtual void
+    evaluateDerivatives(ParticleSetT<T>& P, const OptVariablesType<T>& optvars,
+        Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi, const int& FirstIndex,
+        const int& LastIndex);
+
+    /// Parameter derivatives of the wavefunction
+    virtual void
+    evaluateDerivativesWF(ParticleSetT<T>& P,
+        const OptVariablesType<T>& optvars, Vector<T>& dlogpsi, int FirstIndex,
+        int LastIndex);
+
+    /** Evaluate the derivative of the optimized orbitals with respect to the
+     * parameters this is used only for MSD, to be refined for better serving
+     * both single and multi SD
+     */
+    virtual void
+    evaluateDerivatives(ParticleSetT<T>& P, const OptVariablesType<T>& optvars,
+        Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi, const T& psiCurrent,
+        const std::vector<T>& Coeff, const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const GradMatrix& grads_up,
+        const GradMatrix& grads_dn, const ValueMatrix& lapls_up,
+        const ValueMatrix& lapls_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const GradMatrix& B_grad,
+        const ValueMatrix& B_lapl, const std::vector<int>& detData_up,
+        const size_t N1, const size_t N2, const size_t NP1, const size_t NP2,
+        const std::vector<std::vector<int>>& lookup_tbl);
+
+    /** Evaluate the derivative of the optimized orbitals with respect to the
+     * parameters this is used only for MSD, to be refined for better serving
+     * both single and multi SD
+     */
+    virtual void
+    evaluateDerivativesWF(ParticleSetT<T>& P,
+        const OptVariablesType<T>& optvars, Vector<ValueType>& dlogpsi,
+        const ValueType& psiCurrent, const std::vector<T>& Coeff,
+        const std::vector<size_t>& C2node_up,
+        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
+        const ValueVector& detValues_dn, const ValueMatrix& M_up,
+        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
+        const ValueMatrix& Minv_dn, const std::vector<int>& detData_up,
+        const std::vector<std::vector<int>>& lookup_tbl);
+
+    /** set the OrbitalSetSize
+     * @param norbs number of single-particle orbitals
+     * Ye: I prefer to remove this interface in the future. SPOSet builders need
+     * to handle the size correctly. It doesn't make sense allowing to set the
+     * value at any place in the code.
+     * @TODO make it purely virtual
+     */
+    virtual void
+    setOrbitalSetSize(int norbs){};
+
+    /** evaluate the values of this single-particle orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @TODO make it purely virtual
+     */
+    virtual void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi){};
+
+    /** evaluate determinant ratios for virtual moves, e.g., sphere move for
+     * nonlocalPP
+     * @param VP virtual particle set
+     * @param psi values of the SPO, used as a scratch space if needed
+     * @param psiinv the row of inverse slater matrix corresponding to the
+     * particle moved virtually
+     * @param ratios return determinant ratios
+     */
+    virtual void
+    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
+        const ValueVector& psiinv, std::vector<T>& ratios);
+
+    /// Determinant ratios and parameter derivatives of the wavefunction for
+    /// virtual moves
+    virtual void
+    evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+        const OptVariablesType<T>& optvars, ValueVector& psi,
+        const ValueVector& psiinv, std::vector<T>& ratios, Matrix<T>& dratios,
+        int FirstIndex, int LastIndex);
+
+    /** evaluate determinant ratios for virtual moves, e.g., sphere move for
+     * nonlocalPP, of multiple walkers
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param vp_list a list of virtual particle sets in a walker batch
+     * @param psi_list a list of values of the SPO, used as a scratch space if
+     * needed
+     * @param invRow_ptr_list a list of pointers to the rows of inverse slater
+     * matrix corresponding to the particles moved virtually
+     * @param ratios_list a list of returning determinant ratios
+     */
+    virtual void
+    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+        const RefVector<ValueVector>& psi_list,
+        const std::vector<const T*>& invRow_ptr_list,
+        std::vector<std::vector<T>>& ratios_list) const;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param d2psi laplacians of the SPO
+     * @TODO make this purely virtual
+     */
+    virtual void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi){};
+
+    /** evaluate the values, gradients and laplacians and spin gradient of this
+     * single-particle orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param d2psi laplacians of the SPO
+     * @param dspin spin gradients of the SPO
+     */
+    virtual void
+    evaluateVGL_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin);
+
+    /** evaluate the values this single-particle orbital sets of multiple
+     * walkers
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param psi_v_list the list of value vector pointers in a walker batch
+     */
+    virtual void
+    mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const RefVector<ValueVector>& psi_v_list) const;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital sets of multiple walkers
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param psi_v_list the list of value vector pointers in a walker batch
+     * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+     * @param d2psi_v_list the list of laplacian vector pointers in a walker
+     * batch
+     */
+    virtual void
+    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const RefVector<ValueVector>& psi_v_list,
+        const RefVector<GradVector>& dpsi_v_list,
+        const RefVector<ValueVector>& d2psi_v_list) const;
+
+    /** evaluate the values, gradients and laplacians and spin gradient of this
+     * single-particle orbital sets of multiple walkers
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param psi_v_list the list of value vector pointers in a walker batch
+     * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+     * @param d2psi_v_list the list of laplacian vector pointers in a walker
+     * batch
+     * @param mw_dspin is a dual matrix of spin gradients [nw][norb]
+     * Note that the device side of mw_dspin is up to date
+     */
+    virtual void
+    mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const RefVector<ValueVector>& psi_v_list,
+        const RefVector<GradVector>& dpsi_v_list,
+        const RefVector<ValueVector>& d2psi_v_list,
+        OffloadMatrix<ComplexType>& mw_dspin) const;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital sets and determinant ratio and grads of multiple walkers. Device
+     * data of phi_vgl_v must be up-to-date upon return
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param phi_vgl_v orbital values, gradients and laplacians of all the
+     * walkers
+     * @param psi_ratio_grads_v determinant ratio and grads of all the walkers
+     */
+    virtual void
+    mw_evaluateVGLandDetRatioGrads(
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const std::vector<const T*>& invRow_ptr_list,
+        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
+        std::vector<GradType>& grads) const;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital sets and determinant ratio and grads of multiple walkers. Device
+     * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param phi_vgl_v orbital values, gradients and laplacians of all the
+     * walkers
+     * @param ratios, ratios of all walkers
+     * @param grads, spatial gradients of all walkers
+     * @param spingrads, spin gradients of all walkers
+     */
+    virtual void
+    mw_evaluateVGLandDetRatioGradsWithSpin(
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const std::vector<const T*>& invRow_ptr_list,
+        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
+        std::vector<GradType>& grads, std::vector<T>& spingrads) const;
+
+    /** evaluate the values, gradients and hessians of this single-particle
+     * orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param grad_grad_psi hessians of the SPO
+     */
+    virtual void
+    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi);
+
+    /** evaluate the values, gradients, hessians, and grad hessians of this
+     * single-particle orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param grad_grad_psi hessians of the SPO
+     * @param grad_grad_grad_psi grad hessians of the SPO
+     */
+    virtual void
+    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, HessVector& grad_grad_psi,
+        GGGVector& grad_grad_grad_psi);
+
+    /** evaluate the values of this single-particle orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     */
+    virtual void
+    evaluate_spin(
+        const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi);
+
+    /** evaluate the third derivatives of this single-particle orbital set
+     * @param P current ParticleSet
+     * @param first first particle
+     * @param last last particle
+     * @param grad_grad_grad_logdet third derivatives of the SPO
+     */
+    virtual void
+    evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
+        GGGMatrix& grad_grad_grad_logdet);
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital for [first,last) particles
+     * @param[in] P current ParticleSet
+     * @param[in] first starting index of the particles
+     * @param[in] last ending index of the particles
+     * @param[out] logdet determinant matrix to be inverted
+     * @param[out] dlogdet gradients
+     * @param[out] d2logdet laplacians
+     * @TODO make this pure virtual
+     */
+    virtual void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet){};
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital for [first,last) particles, including the spin gradient
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param logdet determinant matrix to be inverted
+     * @param dlogdet gradients
+     * @param d2logdet laplacians
+     * @param dspinlogdet, spin gradients
+     *
+     * default implementation will abort for all SPOSets except SpinorSet
+     *
+     */
+    virtual void
+    evaluate_notranspose_spin(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
+        ValueMatrix& dspinlogdet);
+
+    virtual void
+    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
+        const RefVector<ValueMatrix>& logdet_list,
+        const RefVector<GradMatrix>& dlogdet_list,
+        const RefVector<ValueMatrix>& d2logdet_list) const;
+
+    /** evaluate the values, gradients and hessians of this single-particle
+     * orbital for [first,last) particles
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param logdet determinant matrix to be inverted
+     * @param dlogdet gradients
+     * @param grad_grad_logdet hessians
+     *
+     */
+    virtual void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet);
+
+    /** evaluate the values, gradients, hessians and third derivatives of this
+     * single-particle orbital for [first,last) particles
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param logdet determinant matrix to be inverted
+     * @param dlogdet gradients
+     * @param grad_grad_logdet hessians
+     * @param grad_grad_grad_logdet third derivatives
+     *
+     */
+    virtual void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
+        GGGMatrix& grad_grad_grad_logdet);
+
+    /** evaluate the gradients of this single-particle orbital
+     *  for [first,last) target particles with respect to the given source
+     * particle
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param iat_src source particle index
+     * @param gradphi gradients
+     *
+     */
+    virtual void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi);
+
+    /** evaluate the gradients of values, gradients, laplacians of this
+     * single-particle orbital for [first,last) target particles with respect to
+     * the given source particle
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param iat_src source particle index
+     * @param gradphi gradients of values
+     * @param grad_grad_phi gradients of gradients
+     * @param grad_lapl_phi gradients of laplacians
+     *
+     */
+    virtual void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
+        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi);
+
+    /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.
+     *
+     *  @param[in] P particle set.
+     *  @param[in] iel The electron at which to evaluate phi(r_iel)
+     *  @param[in] source ion particle set.
+     *  @param[in] iat_src ion ID w.r.t. which to take derivative.
+     *  @param[in,out] gradphi Vector of d/dR_iat phi_j(r).
+     *  @return Void
+     */
+    virtual void
+    evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
+        const ParticleSetT<T>& source, int iat_src, GradVector& gradphi);
+
+    /** access the k point related to the given orbital */
+    virtual PosType
+    get_k(int orb)
+    {
+        return PosType();
+    }
+
+    /** initialize a shared resource and hand it to collection
+     */
+    virtual void
+    createResource(ResourceCollection& collection) const
+    {
+    }
+
+    /** acquire a shared resource from collection
+     */
+    virtual void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+    {
+    }
+
+    /** return a shared resource to collection
+     */
+    virtual void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+    {
+    }
+
+    /** make a clone of itself
+     * every derived class must implement this to have threading working
+     * correctly.
+     */
+    [[noreturn]] virtual std::unique_ptr<SPOSetT<T>>
+    makeClone() const;
+
+    /** Used only by cusp correction in AOS LCAO.
+     * Ye: the SoA LCAO moves all this responsibility to the builder.
+     * This interface should be removed with AoS.
+     */
+    virtual bool
+    transformSPOSet()
+    {
+        return true;
+    }
+
+    /** finalize the construction of SPOSet
+     *
+     * for example, classes serving accelerators may need to transfer data from
+     * host to device after the host side objects are built.
+     */
+    virtual void
+    finalizeConstruction()
+    {
+    }
+
+    /// return object name
+    const std::string&
+    getName() const
+    {
+        return my_name_;
+    }
+
+    /// @TODO make this purely virutal return class name
+    virtual std::string
+    getClassName() const
+    {
+        return "";
+    };
 
 protected:
-  /// name of the object, unique identifier
-  const std::string my_name_;
-  ///number of Single-particle orbitals
-  IndexType OrbitalSetSize;
-  /// Optimizable variables
-  opt_variables_type myVars;
-
-  friend opt_variables_type& testing::getMyVars(SPOSetT<float>& spo);
-  friend opt_variables_type& testing::getMyVars(SPOSetT<double>& spo);
-  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<float>>& spo);
-  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<double>>& spo);
+    /// name of the object, unique identifier
+    const std::string my_name_;
+    /// number of Single-particle orbitals
+    IndexType OrbitalSetSize;
+    /// Optimizable variables
+    OptVariablesType<T> myVars;
+
+    friend OptVariablesType<float>&
+    testing::getMyVars(SPOSetT<float>& spo);
+    friend OptVariablesType<double>&
+    testing::getMyVars(SPOSetT<double>& spo);
+    friend OptVariablesType<std::complex<float>>&
+    testing::getMyVars(SPOSetT<std::complex<float>>& spo);
+    friend OptVariablesType<std::complex<double>>&
+    testing::getMyVars(SPOSetT<std::complex<double>>& spo);
 };
 
-template<class T>
+template <class T>
 using SPOSetTPtr = SPOSetT<T>*;
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp
index 64d7d3d6b1..1090397ad1 100644
--- a/src/QMCWaveFunctions/SpinorSetT.cpp
+++ b/src/QMCWaveFunctions/SpinorSetT.cpp
@@ -1,583 +1,621 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2022 QMCPACK developers
 //
-// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
-//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National
+// Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
+//                    Laboratories
 //
-// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National
+// Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SpinorSetT.h"
-#include "Utilities/ResourceCollection.h"
+
 #include "Platforms/OMPTarget/OMPTargetMath.hpp"
+#include "Utilities/ResourceCollection.h"
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 struct SpinorSetT<T>::SpinorSetMultiWalkerResource : public Resource
 {
-  SpinorSetMultiWalkerResource() : Resource("SpinorSet") {}
-  SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {}
-  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<SpinorSetMultiWalkerResource>(*this); }
-  OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
-  std::vector<T> up_ratios, dn_ratios;
-  std::vector<GradType> up_grads, dn_grads;
-  std::vector<RealType> spins;
+    SpinorSetMultiWalkerResource() : Resource("SpinorSet")
+    {
+    }
+    SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) :
+        SpinorSetMultiWalkerResource()
+    {
+    }
+    std::unique_ptr<Resource>
+    makeClone() const override
+    {
+        return std::make_unique<SpinorSetMultiWalkerResource>(*this);
+    }
+    OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
+    std::vector<T> up_ratios, dn_ratios;
+    std::vector<GradType> up_grads, dn_grads;
+    std::vector<RealType> spins;
 };
 
-template<class T>
-SpinorSetT<T>::SpinorSetT(const std::string& my_name) : SPOSetT<T>(my_name), spo_up(nullptr), spo_dn(nullptr)
-{}
+template <class T>
+SpinorSetT<T>::SpinorSetT(const std::string& my_name) :
+    SPOSetT<T>(my_name),
+    spo_up(nullptr),
+    spo_dn(nullptr)
+{
+}
 
-template<class T>
+template <class T>
 SpinorSetT<T>::~SpinorSetT() = default;
 
-template<class T>
-void SpinorSetT<T>::set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
+template <class T>
+void
+SpinorSetT<T>::set_spos(
+    std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
 {
-  //Sanity check for input SPO's.  They need to be the same size or
-  IndexType spo_size_up   = up->getOrbitalSetSize();
-  IndexType spo_size_down = dn->getOrbitalSetSize();
+    // Sanity check for input SPO's.  They need to be the same size or
+    IndexType spo_size_up = up->getOrbitalSetSize();
+    IndexType spo_size_down = dn->getOrbitalSetSize();
 
-  if (spo_size_up != spo_size_down)
-    throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO components have different sizes.");
+    if (spo_size_up != spo_size_down)
+        throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO "
+                                 "components have different sizes.");
 
-  setOrbitalSetSize(spo_size_up);
+    setOrbitalSetSize(spo_size_up);
 
-  spo_up = std::move(up);
-  spo_dn = std::move(dn);
+    spo_up = std::move(up);
+    spo_dn = std::move(dn);
 
-  psi_work_up.resize(this->OrbitalSetSize);
-  psi_work_down.resize(this->OrbitalSetSize);
+    psi_work_up.resize(this->OrbitalSetSize);
+    psi_work_down.resize(this->OrbitalSetSize);
 
-  dpsi_work_up.resize(this->OrbitalSetSize);
-  dpsi_work_down.resize(this->OrbitalSetSize);
+    dpsi_work_up.resize(this->OrbitalSetSize);
+    dpsi_work_down.resize(this->OrbitalSetSize);
 
-  d2psi_work_up.resize(this->OrbitalSetSize);
-  d2psi_work_down.resize(this->OrbitalSetSize);
+    d2psi_work_up.resize(this->OrbitalSetSize);
+    d2psi_work_down.resize(this->OrbitalSetSize);
 }
 
-template<class T>
-void SpinorSetT<T>::setOrbitalSetSize(int norbs)
+template <class T>
+void
+SpinorSetT<T>::setOrbitalSetSize(int norbs)
 {
-  this->OrbitalSetSize = norbs;
+    this->OrbitalSetSize = norbs;
 };
 
-template<class T>
-void SpinorSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+SpinorSetT<T>::evaluateValue(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  psi_work_up   = 0.0;
-  psi_work_down = 0.0;
+    psi_work_up = 0.0;
+    psi_work_down = 0.0;
 
-  spo_up->evaluateValue(P, iat, psi_work_up);
-  spo_dn->evaluateValue(P, iat, psi_work_down);
+    spo_up->evaluateValue(P, iat, psi_work_up);
+    spo_dn->evaluateValue(P, iat, psi_work_down);
 
-  ParticleSet::Scalar_t s = P.activeSpin(iat);
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-  RealType coss(0.0), sins(0.0);
+    RealType coss(0.0), sins(0.0);
 
-  coss = std::cos(s);
-  sins = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
-  //This is only supported in the complex build, so T is some complex number depending on the precision.
-  T eis(coss, sins);
-  T emis(coss, -sins);
+    // This is only supported in the complex build, so T is some complex number
+    // depending on the precision.
+    T eis(coss, sins);
+    T emis(coss, -sins);
 
-  psi = eis * psi_work_up + emis * psi_work_down;
+    psi = eis * psi_work_up + emis * psi_work_down;
 }
 
-template<class T>
-void SpinorSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template <class T>
+void
+SpinorSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+    GradVector& dpsi, ValueVector& d2psi)
 {
-  psi_work_up     = 0.0;
-  psi_work_down   = 0.0;
-  dpsi_work_up    = 0.0;
-  dpsi_work_down  = 0.0;
-  d2psi_work_up   = 0.0;
-  d2psi_work_down = 0.0;
+    psi_work_up = 0.0;
+    psi_work_down = 0.0;
+    dpsi_work_up = 0.0;
+    dpsi_work_down = 0.0;
+    d2psi_work_up = 0.0;
+    d2psi_work_down = 0.0;
 
-  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
-  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+    spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+    spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
 
-  ParticleSet::Scalar_t s = P.activeSpin(iat);
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-  RealType coss(0.0), sins(0.0);
+    RealType coss(0.0), sins(0.0);
 
-  coss = std::cos(s);
-  sins = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
-  T eis(coss, sins);
-  T emis(coss, -sins);
+    T eis(coss, sins);
+    T emis(coss, -sins);
 
-  psi   = eis * psi_work_up + emis * psi_work_down;
-  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
-  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+    psi = eis * psi_work_up + emis * psi_work_down;
+    dpsi = eis * dpsi_work_up + emis * dpsi_work_down;
+    d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
 }
 
-template<class T>
-void SpinorSetT<T>::evaluateVGL_spin(const ParticleSet& P,
-                                     int iat,
-                                     ValueVector& psi,
-                                     GradVector& dpsi,
-                                     ValueVector& d2psi,
-                                     ValueVector& dspin)
+template <class T>
+void
+SpinorSetT<T>::evaluateVGL_spin(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin)
 {
-  psi_work_up     = 0.0;
-  psi_work_down   = 0.0;
-  dpsi_work_up    = 0.0;
-  dpsi_work_down  = 0.0;
-  d2psi_work_up   = 0.0;
-  d2psi_work_down = 0.0;
-
-  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
-  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+    psi_work_up = 0.0;
+    psi_work_down = 0.0;
+    dpsi_work_up = 0.0;
+    dpsi_work_down = 0.0;
+    d2psi_work_up = 0.0;
+    d2psi_work_down = 0.0;
 
-  ParticleSet::Scalar_t s = P.activeSpin(iat);
+    spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+    spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
 
-  RealType coss(0.0), sins(0.0);
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-  coss = std::cos(s);
-  sins = std::sin(s);
-
-  T eis(coss, sins);
-  T emis(coss, -sins);
-  T eye(0, 1.0);
-
-  psi   = eis * psi_work_up + emis * psi_work_down;
-  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
-  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
-  dspin = eye * (eis * psi_work_up - emis * psi_work_down);
-}
+    RealType coss(0.0), sins(0.0);
 
-template<class T>
-void SpinorSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                           const RefVectorWithLeader<ParticleSet>& P_list,
-                                           int iat,
-                                           const RefVector<ValueVector>& psi_v_list,
-                                           const RefVector<GradVector>& dpsi_v_list,
-                                           const RefVector<ValueVector>& d2psi_v_list,
-                                           OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
-{
-  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  auto& P_leader   = P_list.getLeader();
-  assert(this == &spo_leader);
-
-  IndexType nw                    = spo_list.size();
-  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-  auto& up_spo_leader             = up_spo_list.getLeader();
-  auto& dn_spo_leader             = dn_spo_list.getLeader();
-
-  RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
-  RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
-  RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
-  for (int iw = 0; iw < nw; iw++)
-  {
-    auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
-    up_psi_v_list.push_back(spo.psi_work_up);
-    dn_psi_v_list.push_back(spo.psi_work_down);
-    up_dpsi_v_list.push_back(spo.dpsi_work_up);
-    dn_dpsi_v_list.push_back(spo.dpsi_work_down);
-    up_d2psi_v_list.push_back(spo.d2psi_work_up);
-    dn_d2psi_v_list.push_back(spo.d2psi_work_down);
-  }
-
-  up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list);
-  dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list);
-
-  for (int iw = 0; iw < nw; iw++)
-  {
-    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
-    RealType coss           = std::cos(s);
-    RealType sins           = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
     T eis(coss, sins);
     T emis(coss, -sins);
     T eye(0, 1.0);
 
-    psi_v_list[iw].get()   = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
-    dpsi_v_list[iw].get()  = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
-    d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
-    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
-      mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]);
-  }
-  //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device
-  mw_dspin.updateTo();
+    psi = eis * psi_work_up + emis * psi_work_down;
+    dpsi = eis * dpsi_work_up + emis * dpsi_work_down;
+    d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+    dspin = eye * (eis * psi_work_up - emis * psi_work_down);
 }
 
-template<class T>
-void SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                                           const RefVectorWithLeader<ParticleSet>& P_list,
-                                                           int iat,
-                                                           const std::vector<const T*>& invRow_ptr_list,
-                                                           OffloadMWVGLArray& phi_vgl_v,
-                                                           std::vector<T>& ratios,
-                                                           std::vector<GradType>& grads,
-                                                           std::vector<T>& spingrads) const
+template <class T>
+void
+SpinorSetT<T>::mw_evaluateVGLWithSpin(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const RefVector<ValueVector>& psi_v_list,
+    const RefVector<GradVector>& dpsi_v_list,
+    const RefVector<ValueVector>& d2psi_v_list,
+    OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
 {
-  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  auto& P_leader   = P_list.getLeader();
-  assert(this == &spo_leader);
-  assert(phi_vgl_v.size(0) == DIM_VGL);
-  assert(phi_vgl_v.size(1) == spo_list.size());
-  const size_t nw             = spo_list.size();
-  const size_t norb_requested = phi_vgl_v.size(2);
-
-  auto& mw_res       = spo_leader.mw_res_handle_.getResource();
-  auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
-  auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
-  auto& up_ratios    = mw_res.up_ratios;
-  auto& dn_ratios    = mw_res.dn_ratios;
-  auto& up_grads     = mw_res.up_grads;
-  auto& dn_grads     = mw_res.dn_grads;
-  auto& spins        = mw_res.spins;
-
-  up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
-  dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
-  up_ratios.resize(nw);
-  dn_ratios.resize(nw);
-  up_grads.resize(nw);
-  dn_grads.resize(nw);
-  spins.resize(nw);
-
-  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-  auto& up_spo_leader             = up_spo_list.getLeader();
-  auto& dn_spo_leader             = dn_spo_list.getLeader();
-
-  up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios,
-                                               up_grads);
-  dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios,
-                                               dn_grads);
-  for (int iw = 0; iw < nw; iw++)
-  {
-    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
-    spins[iw]               = s;
-    RealType coss           = std::cos(s);
-    RealType sins           = std::sin(s);
-
-    T eis(coss, sins);
-    T emis(coss, -sins);
-    T eye(0, 1.0);
+    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+    auto& P_leader = P_list.getLeader();
+    assert(this == &spo_leader);
+
+    IndexType nw = spo_list.size();
+    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+    auto& up_spo_leader = up_spo_list.getLeader();
+    auto& dn_spo_leader = dn_spo_list.getLeader();
+
+    RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
+    RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
+    RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
+    for (int iw = 0; iw < nw; iw++) {
+        auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+        up_psi_v_list.push_back(spo.psi_work_up);
+        dn_psi_v_list.push_back(spo.psi_work_down);
+        up_dpsi_v_list.push_back(spo.dpsi_work_up);
+        dn_dpsi_v_list.push_back(spo.dpsi_work_down);
+        up_d2psi_v_list.push_back(spo.d2psi_work_up);
+        dn_d2psi_v_list.push_back(spo.d2psi_work_down);
+    }
 
-    ratios[iw]    = eis * up_ratios[iw] + emis * dn_ratios[iw];
-    grads[iw]     = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw];
-    spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
-  }
-
-  auto* spins_ptr = spins.data();
-  //This data lives on the device
-  auto* phi_vgl_ptr    = phi_vgl_v.data();
-  auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
-  auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
-  PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
-  for (int iw = 0; iw < nw; iw++)
-  {
-    RealType c, s;
-    omptarget::sincos(spins_ptr[iw], &s, &c);
-    T eis(c, s), emis(c, -s);
-    PRAGMA_OFFLOAD("omp parallel for collapse(2)")
-    for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
-      for (int iorb = 0; iorb < norb_requested; iorb++)
-      {
-        auto offset         = idim * nw * norb_requested + iw * norb_requested + iorb;
-        phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset];
-      }
-  }
+    up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list,
+        up_dpsi_v_list, up_d2psi_v_list);
+    dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list,
+        dn_dpsi_v_list, dn_d2psi_v_list);
+
+    for (int iw = 0; iw < nw; iw++) {
+        typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+        RealType coss = std::cos(s);
+        RealType sins = std::sin(s);
+
+        T eis(coss, sins);
+        T emis(coss, -sins);
+        T eye(0, 1.0);
+
+        psi_v_list[iw].get() =
+            eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
+        dpsi_v_list[iw].get() =
+            eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
+        d2psi_v_list[iw].get() =
+            eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
+        for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+            mw_dspin(iw, iorb) = eye *
+                (eis * (up_psi_v_list[iw].get())[iorb] -
+                    emis * (dn_psi_v_list[iw].get())[iorb]);
+    }
+    // Data above is all on host, but since mw_dspin is DualMatrix we need to
+    // sync the host and device
+    mw_dspin.updateTo();
 }
 
-template<class T>
-void SpinorSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                         int first,
-                                         int last,
-                                         ValueMatrix& logdet,
-                                         GradMatrix& dlogdet,
-                                         ValueMatrix& d2logdet)
+template <class T>
+void
+SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
+    std::vector<T>& ratios, std::vector<GradType>& grads,
+    std::vector<T>& spingrads) const
 {
-  IndexType nelec = P.getTotalNum();
+    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+    auto& P_leader = P_list.getLeader();
+    assert(this == &spo_leader);
+    assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+    assert(phi_vgl_v.size(1) == spo_list.size());
+    const size_t nw = spo_list.size();
+    const size_t norb_requested = phi_vgl_v.size(2);
+
+    auto& mw_res = spo_leader.mw_res_handle_.getResource();
+    auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
+    auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
+    auto& up_ratios = mw_res.up_ratios;
+    auto& dn_ratios = mw_res.dn_ratios;
+    auto& up_grads = mw_res.up_grads;
+    auto& dn_grads = mw_res.dn_grads;
+    auto& spins = mw_res.spins;
+
+    up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+    dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+    up_ratios.resize(nw);
+    dn_ratios.resize(nw);
+    up_grads.resize(nw);
+    dn_grads.resize(nw);
+    spins.resize(nw);
+
+    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+    auto& up_spo_leader = up_spo_list.getLeader();
+    auto& dn_spo_leader = dn_spo_list.getLeader();
+
+    up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat,
+        invRow_ptr_list, up_phi_vgl_v, up_ratios, up_grads);
+    dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat,
+        invRow_ptr_list, dn_phi_vgl_v, dn_ratios, dn_grads);
+    for (int iw = 0; iw < nw; iw++) {
+        typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+        spins[iw] = s;
+        RealType coss = std::cos(s);
+        RealType sins = std::sin(s);
+
+        T eis(coss, sins);
+        T emis(coss, -sins);
+        T eye(0, 1.0);
+
+        ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw];
+        grads[iw] = (eis * up_grads[iw] * up_ratios[iw] +
+                        emis * dn_grads[iw] * dn_ratios[iw]) /
+            ratios[iw];
+        spingrads[iw] =
+            eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
+    }
 
-  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+    auto* spins_ptr = spins.data();
+    // This data lives on the device
+    auto* phi_vgl_ptr = phi_vgl_v.data();
+    auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
+    auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
+    PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
+    for (int iw = 0; iw < nw; iw++) {
+        RealType c, s;
+        omptarget::sincos(spins_ptr[iw], &s, &c);
+        T eis(c, s), emis(c, -s);
+        PRAGMA_OFFLOAD("omp parallel for collapse(2)")
+        for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+            for (int iorb = 0; iorb < norb_requested; iorb++) {
+                auto offset =
+                    idim * nw * norb_requested + iw * norb_requested + iorb;
+                phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] +
+                    emis * dn_phi_vgl_ptr[offset];
+            }
+    }
+}
 
-  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+template <class T>
+void
+SpinorSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+{
+    IndexType nelec = P.getTotalNum();
 
-  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+    logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
-  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+    dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
+    d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-  for (int iat = 0; iat < nelec; iat++)
-  {
-    ParticleSet::Scalar_t s = P.activeSpin(iat);
+    spo_up->evaluate_notranspose(
+        P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+    spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down,
+        dlogpsi_work_down, d2logpsi_work_down);
 
-    RealType coss(0.0), sins(0.0);
+    for (int iat = 0; iat < nelec; iat++) {
+        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+        RealType coss(0.0), sins(0.0);
 
-    T eis(coss, sins);
-    T emis(coss, -sins);
+        coss = std::cos(s);
+        sins = std::sin(s);
 
-    for (int no = 0; no < this->OrbitalSetSize; no++)
-    {
-      logdet(iat, no)   = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
-      dlogdet(iat, no)  = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
-      d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+        T eis(coss, sins);
+        T emis(coss, -sins);
+
+        for (int no = 0; no < this->OrbitalSetSize; no++) {
+            logdet(iat, no) = eis * logpsi_work_up(iat, no) +
+                emis * logpsi_work_down(iat, no);
+            dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) +
+                emis * dlogpsi_work_down(iat, no);
+            d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) +
+                emis * d2logpsi_work_down(iat, no);
+        }
     }
-  }
 }
 
-template<class T>
-void SpinorSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                            const RefVectorWithLeader<ParticleSet>& P_list,
-                                            int first,
-                                            int last,
-                                            const RefVector<ValueMatrix>& logdet_list,
-                                            const RefVector<GradMatrix>& dlogdet_list,
-                                            const RefVector<ValueMatrix>& d2logdet_list) const
+template <class T>
+void
+SpinorSetT<T>::mw_evaluate_notranspose(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
+    const RefVector<ValueMatrix>& logdet_list,
+    const RefVector<GradMatrix>& dlogdet_list,
+    const RefVector<ValueMatrix>& d2logdet_list) const
 {
-  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  auto& P_leader   = P_list.getLeader();
-  assert(this == &spo_leader);
-
-  IndexType nw    = spo_list.size();
-  IndexType nelec = P_leader.getTotalNum();
-
-  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-  auto& up_spo_leader             = up_spo_list.getLeader();
-  auto& dn_spo_leader             = dn_spo_list.getLeader();
-
-  std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
-  std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
-  std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
-  mw_up_logdet.reserve(nw);
-  mw_dn_logdet.reserve(nw);
-  mw_up_dlogdet.reserve(nw);
-  mw_dn_dlogdet.reserve(nw);
-  mw_up_d2logdet.reserve(nw);
-  mw_dn_d2logdet.reserve(nw);
-
-  RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
-  RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
-  RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
-  up_logdet_list.reserve(nw);
-  dn_logdet_list.reserve(nw);
-  up_dlogdet_list.reserve(nw);
-  dn_dlogdet_list.reserve(nw);
-  up_d2logdet_list.reserve(nw);
-  dn_d2logdet_list.reserve(nw);
-
-  ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
-  GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
-  for (int iw = 0; iw < nw; iw++)
-  {
-    mw_up_logdet.emplace_back(tmp_val_mat);
-    up_logdet_list.emplace_back(mw_up_logdet.back());
-    mw_dn_logdet.emplace_back(tmp_val_mat);
-    dn_logdet_list.emplace_back(mw_dn_logdet.back());
-
-    mw_up_dlogdet.emplace_back(tmp_grad_mat);
-    up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
-    mw_dn_dlogdet.emplace_back(tmp_grad_mat);
-    dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
-
-    mw_up_d2logdet.emplace_back(tmp_val_mat);
-    up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
-    mw_dn_d2logdet.emplace_back(tmp_val_mat);
-    dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
-  }
-
-  up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list,
-                                        up_d2logdet_list);
-  dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list,
-                                        dn_d2logdet_list);
-
-  for (int iw = 0; iw < nw; iw++)
-    for (int iat = 0; iat < nelec; iat++)
-    {
-      ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
-      RealType coss           = std::cos(s);
-      RealType sins           = std::sin(s);
-      T eis(coss, sins);
-      T emis(coss, -sins);
-
-      for (int no = 0; no < this->OrbitalSetSize; no++)
-      {
-        logdet_list[iw].get()(iat, no) =
-            eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no);
-        dlogdet_list[iw].get()(iat, no) =
-            eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no);
-        d2logdet_list[iw].get()(iat, no) =
-            eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no);
-      }
+    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+    auto& P_leader = P_list.getLeader();
+    assert(this == &spo_leader);
+
+    IndexType nw = spo_list.size();
+    IndexType nelec = P_leader.getTotalNum();
+
+    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+    auto& up_spo_leader = up_spo_list.getLeader();
+    auto& dn_spo_leader = dn_spo_list.getLeader();
+
+    std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
+    std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
+    std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
+    mw_up_logdet.reserve(nw);
+    mw_dn_logdet.reserve(nw);
+    mw_up_dlogdet.reserve(nw);
+    mw_dn_dlogdet.reserve(nw);
+    mw_up_d2logdet.reserve(nw);
+    mw_dn_d2logdet.reserve(nw);
+
+    RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
+    RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
+    RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
+    up_logdet_list.reserve(nw);
+    dn_logdet_list.reserve(nw);
+    up_dlogdet_list.reserve(nw);
+    dn_dlogdet_list.reserve(nw);
+    up_d2logdet_list.reserve(nw);
+    dn_d2logdet_list.reserve(nw);
+
+    ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
+    GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
+    for (int iw = 0; iw < nw; iw++) {
+        mw_up_logdet.emplace_back(tmp_val_mat);
+        up_logdet_list.emplace_back(mw_up_logdet.back());
+        mw_dn_logdet.emplace_back(tmp_val_mat);
+        dn_logdet_list.emplace_back(mw_dn_logdet.back());
+
+        mw_up_dlogdet.emplace_back(tmp_grad_mat);
+        up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
+        mw_dn_dlogdet.emplace_back(tmp_grad_mat);
+        dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
+
+        mw_up_d2logdet.emplace_back(tmp_val_mat);
+        up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
+        mw_dn_d2logdet.emplace_back(tmp_val_mat);
+        dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
     }
+
+    up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last,
+        up_logdet_list, up_dlogdet_list, up_d2logdet_list);
+    dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last,
+        dn_logdet_list, dn_dlogdet_list, dn_d2logdet_list);
+
+    for (int iw = 0; iw < nw; iw++)
+        for (int iat = 0; iat < nelec; iat++) {
+            typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+            RealType coss = std::cos(s);
+            RealType sins = std::sin(s);
+            T eis(coss, sins);
+            T emis(coss, -sins);
+
+            for (int no = 0; no < this->OrbitalSetSize; no++) {
+                logdet_list[iw].get()(iat, no) =
+                    eis * up_logdet_list[iw].get()(iat, no) +
+                    emis * dn_logdet_list[iw].get()(iat, no);
+                dlogdet_list[iw].get()(iat, no) =
+                    eis * up_dlogdet_list[iw].get()(iat, no) +
+                    emis * dn_dlogdet_list[iw].get()(iat, no);
+                d2logdet_list[iw].get()(iat, no) =
+                    eis * up_d2logdet_list[iw].get()(iat, no) +
+                    emis * dn_d2logdet_list[iw].get()(iat, no);
+            }
+        }
 }
 
-template<class T>
-void SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
-                                              int first,
-                                              int last,
-                                              ValueMatrix& logdet,
-                                              GradMatrix& dlogdet,
-                                              ValueMatrix& d2logdet,
-                                              ValueMatrix& dspinlogdet)
+template <class T>
+void
+SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
+    ValueMatrix& dspinlogdet)
 {
-  IndexType nelec = P.getTotalNum();
-
-  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+    IndexType nelec = P.getTotalNum();
 
-  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+    logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+    dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
-  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+    d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+    d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
+    spo_up->evaluate_notranspose(
+        P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+    spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down,
+        dlogpsi_work_down, d2logpsi_work_down);
 
-  for (int iat = 0; iat < nelec; iat++)
-  {
-    ParticleSet::Scalar_t s = P.activeSpin(iat);
+    for (int iat = 0; iat < nelec; iat++) {
+        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    RealType coss(0.0), sins(0.0);
+        RealType coss(0.0), sins(0.0);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+        coss = std::cos(s);
+        sins = std::sin(s);
 
-    T eis(coss, sins);
-    T emis(coss, -sins);
-    T eye(0, 1.0);
+        T eis(coss, sins);
+        T emis(coss, -sins);
+        T eye(0, 1.0);
 
-    for (int no = 0; no < this->OrbitalSetSize; no++)
-    {
-      logdet(iat, no)      = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
-      dlogdet(iat, no)     = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
-      d2logdet(iat, no)    = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
-      dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no));
+        for (int no = 0; no < this->OrbitalSetSize; no++) {
+            logdet(iat, no) = eis * logpsi_work_up(iat, no) +
+                emis * logpsi_work_down(iat, no);
+            dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) +
+                emis * dlogpsi_work_down(iat, no);
+            d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) +
+                emis * d2logpsi_work_down(iat, no);
+            dspinlogdet(iat, no) = eye *
+                (eis * logpsi_work_up(iat, no) -
+                    emis * logpsi_work_down(iat, no));
+        }
     }
-  }
 }
 
-template<class T>
-void SpinorSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+template <class T>
+void
+SpinorSetT<T>::evaluate_spin(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi)
 {
-  psi_work_up   = 0.0;
-  psi_work_down = 0.0;
+    psi_work_up = 0.0;
+    psi_work_down = 0.0;
 
-  spo_up->evaluateValue(P, iat, psi_work_up);
-  spo_dn->evaluateValue(P, iat, psi_work_down);
+    spo_up->evaluateValue(P, iat, psi_work_up);
+    spo_dn->evaluateValue(P, iat, psi_work_down);
 
-  ParticleSet::Scalar_t s = P.activeSpin(iat);
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-  RealType coss(0.0), sins(0.0);
+    RealType coss(0.0), sins(0.0);
 
-  coss = std::cos(s);
-  sins = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
-  T eis(coss, sins);
-  T emis(coss, -sins);
-  T eye(0, 1.0);
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
 
-  psi  = eis * psi_work_up + emis * psi_work_down;
-  dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
+    psi = eis * psi_work_up + emis * psi_work_down;
+    dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
 }
 
-template<class T>
-void SpinorSetT<T>::evaluateGradSource(const ParticleSet& P,
-                                       int first,
-                                       int last,
-                                       const ParticleSet& source,
-                                       int iat_src,
-                                       GradMatrix& gradphi)
+template <class T>
+void
+SpinorSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+    const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
 {
-  IndexType nelec = P.getTotalNum();
-
-  GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
-  GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
-  spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
-  spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
-
-  for (int iat = 0; iat < nelec; iat++)
-  {
-    ParticleSet::Scalar_t s = P.activeSpin(iat);
-    RealType coss           = std::cos(s);
-    RealType sins           = std::sin(s);
-    T eis(coss, sins);
-    T emis(coss, -sins);
-    for (int imo = 0; imo < this->OrbitalSetSize; imo++)
-      gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
-  }
+    IndexType nelec = P.getTotalNum();
+
+    GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
+    GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
+    spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
+    spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
+
+    for (int iat = 0; iat < nelec; iat++) {
+        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+        RealType coss = std::cos(s);
+        RealType sins = std::sin(s);
+        T eis(coss, sins);
+        T emis(coss, -sins);
+        for (int imo = 0; imo < this->OrbitalSetSize; imo++)
+            gradphi(iat, imo) =
+                gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
+    }
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> SpinorSetT<T>::makeClone() const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+SpinorSetT<T>::makeClone() const
 {
-  auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
-  std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
-  std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
-  myclone->set_spos(std::move(cloneup), std::move(clonedn));
-  return myclone;
+    auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
+    std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
+    std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
+    myclone->set_spos(std::move(cloneup), std::move(clonedn));
+    return myclone;
 }
 
-template<class T>
-void SpinorSetT<T>::createResource(ResourceCollection& collection) const
+template <class T>
+void
+SpinorSetT<T>::createResource(ResourceCollection& collection) const
 {
-  spo_up->createResource(collection);
-  spo_dn->createResource(collection);
-  auto index = collection.addResource(std::make_unique<SpinorSetMultiWalkerResource>());
+    spo_up->createResource(collection);
+    spo_dn->createResource(collection);
+    auto index = collection.addResource(
+        std::make_unique<SpinorSetMultiWalkerResource>());
 }
 
-template<class T>
-void SpinorSetT<T>::acquireResource(ResourceCollection& collection,
-                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template <class T>
+void
+SpinorSetT<T>::acquireResource(ResourceCollection& collection,
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  auto& up_spo_leader             = up_spo_list.getLeader();
-  auto& dn_spo_leader             = dn_spo_list.getLeader();
-  up_spo_leader.acquireResource(collection, up_spo_list);
-  dn_spo_leader.acquireResource(collection, dn_spo_list);
-  spo_leader.mw_res_handle_ = collection.lendResource<SpinorSetMultiWalkerResource>();
+    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+    auto& up_spo_leader = up_spo_list.getLeader();
+    auto& dn_spo_leader = dn_spo_list.getLeader();
+    up_spo_leader.acquireResource(collection, up_spo_list);
+    dn_spo_leader.acquireResource(collection, dn_spo_list);
+    spo_leader.mw_res_handle_ =
+        collection.lendResource<SpinorSetMultiWalkerResource>();
 }
 
-template<class T>
-void SpinorSetT<T>::releaseResource(ResourceCollection& collection,
-                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template <class T>
+void
+SpinorSetT<T>::releaseResource(ResourceCollection& collection,
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  auto& up_spo_leader             = up_spo_list.getLeader();
-  auto& dn_spo_leader             = dn_spo_list.getLeader();
-  up_spo_leader.releaseResource(collection, up_spo_list);
-  dn_spo_leader.releaseResource(collection, dn_spo_list);
-  collection.takebackResource(spo_leader.mw_res_handle_);
+    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+    auto& up_spo_leader = up_spo_list.getLeader();
+    auto& dn_spo_leader = dn_spo_list.getLeader();
+    up_spo_leader.releaseResource(collection, up_spo_list);
+    dn_spo_leader.releaseResource(collection, dn_spo_list);
+    collection.takebackResource(spo_leader.mw_res_handle_);
 }
 
-template<class T>
-std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> SpinorSetT<T>::extractSpinComponentRefList(
+template <class T>
+std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>>
+SpinorSetT<T>::extractSpinComponentRefList(
     const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-  SpinorSetT<T>& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-  IndexType nw              = spo_list.size();
-  SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
-  SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
-  RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
-  RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
-  up_spo_list.reserve(nw);
-  dn_spo_list.reserve(nw);
-  for (int iw = 0; iw < nw; iw++)
-  {
-    SpinorSetT<T>& spinor = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
-    up_spo_list.emplace_back(*(spinor.spo_up));
-    dn_spo_list.emplace_back(*(spinor.spo_dn));
-  }
-  return std::make_pair(up_spo_list, dn_spo_list);
+    SpinorSetT<T>& spo_leader =
+        spo_list.template getCastedLeader<SpinorSetT<T>>();
+    IndexType nw = spo_list.size();
+    SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
+    SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
+    RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
+    RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
+    up_spo_list.reserve(nw);
+    dn_spo_list.reserve(nw);
+    for (int iw = 0; iw < nw; iw++) {
+        SpinorSetT<T>& spinor =
+            spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+        up_spo_list.emplace_back(*(spinor.spo_up));
+        dn_spo_list.emplace_back(*(spinor.spo_dn));
+    }
+    return std::make_pair(up_spo_list, dn_spo_list);
 }
 
 template class SpinorSetT<std::complex<double>>;
diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h
index fe50a256fe..08990e350b 100644
--- a/src/QMCWaveFunctions/SpinorSetT.h
+++ b/src/QMCWaveFunctions/SpinorSetT.h
@@ -1,13 +1,16 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2022 QMCPACK developers
 //
-// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
-//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National
+// Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
+//                    Laboratories
 //
-// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National
+// Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SPINORSETT_H
@@ -21,208 +24,244 @@ namespace qmcplusplus
 /** Class for Melton & Mitas style Spinors.
  *
  */
-template<class T>
+template <class T>
 class SpinorSetT : public SPOSetT<T>
 {
 public:
-  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
-  using ValueVector       = typename SPOSetT<T>::ValueVector;
-  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
-  using GradType          = typename SPOSetT<T>::GradType;
-  using GradVector        = typename SPOSetT<T>::GradVector;
-  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
-  //using OffloadMWVGLArray = typename SPOSetT<T>::template OffloadMWCGLArray;
-  template<typename DT>
-  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
-  using RealType      = typename SPOSetT<T>::RealType;
-  using IndexType     = OHMMS_INDEXTYPE;
-
-  /** constructor */
-  SpinorSetT(const std::string& my_name);
-  ~SpinorSetT() override;
-
-  std::string getClassName() const override { return "SpinorSet"; }
-  bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); }
-  bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); }
-  bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); }
-
-  //This class is initialized by separately building the up and down channels of the spinor set and
-  //then registering them.
-  void set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
-
-  /** set the OrbitalSetSize
-   * @param norbs number of single-particle orbitals
-   */
-  void setOrbitalSetSize(int norbs) override;
-
-  /** evaluate the values of this spinor set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   */
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param d2psi laplacians of the SPO
-   */
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital set
-   * @param P current ParticleSet
-   * @param iat active particle
-   * @param psi values of the SPO
-   * @param dpsi gradients of the SPO
-   * @param d2psi laplacians of the SPO
-   * @param dspin spin gradient of the SPO
-   */
-  void evaluateVGL_spin(const ParticleSet& P,
-                        int iat,
-                        ValueVector& psi,
-                        GradVector& dpsi,
-                        ValueVector& d2psi,
-                        ValueVector& dspin) override;
-
-  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param psi_v_list the list of value vector pointers in a walker batch
-   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
-   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
-   * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals
-   */
-  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                              const RefVectorWithLeader<ParticleSet>& P_list,
-                              int iat,
-                              const RefVector<ValueVector>& psi_v_list,
-                              const RefVector<GradVector>& dpsi_v_list,
-                              const RefVector<ValueVector>& d2psi_v_list,
-                              OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
-
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
-   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
-   *  Includes spin gradients
-   * @param spo_list the list of SPOSet pointers in a walker batch
-   * @param P_list the list of ParticleSet pointers in a walker batch
-   * @param iat active particle
-   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
-   * @param ratios, ratios of all walkers
-   * @param grads, spatial gradients of all walkers
-   * @param spingrads, spin gradients of all walkers
-   */
-  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                                              const RefVectorWithLeader<ParticleSet>& P_list,
-                                              int iat,
-                                              const std::vector<const T*>& invRow_ptr_list,
-                                              OffloadMWVGLArray& phi_vgl_v,
-                                              std::vector<T>& ratios,
-                                              std::vector<GradType>& grads,
-                                              std::vector<T>& spingrads) const override;
-
-  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param logdet determinant matrix to be inverted
-   * @param dlogdet gradients
-   * @param d2logdet laplacians
-   *
-   */
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override;
-
-  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-                               const RefVectorWithLeader<ParticleSet>& P_list,
-                               int first,
-                               int last,
-                               const RefVector<ValueMatrix>& logdet_list,
-                               const RefVector<GradMatrix>& dlogdet_list,
-                               const RefVector<ValueMatrix>& d2logdet_list) const override;
-
-  void evaluate_notranspose_spin(const ParticleSet& P,
-                                 int first,
-                                 int last,
-                                 ValueMatrix& logdet,
-                                 GradMatrix& dlogdet,
-                                 ValueMatrix& d2logdet,
-                                 ValueMatrix& dspinlogdet) override;
-  /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat.
-   *  @param P current particle set.
-   *  @param iat electron index.
-   *  @param spinor values.
-   *  @param spin gradient values. d/ds phi(r,s).
-   *
-   */
-  void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override;
-
-  /** evaluate the gradients of this single-particle orbital
-   *  for [first,last) target particles with respect to the given source particle
-   * @param P current ParticleSet
-   * @param first starting index of the particles
-   * @param last ending index of the particles
-   * @param iat_src source particle index
-   * @param gradphi gradients
-   *
-   */
-  virtual void evaluateGradSource(const ParticleSet& P,
-                                  int first,
-                                  int last,
-                                  const ParticleSet& source,
-                                  int iat_src,
-                                  GradMatrix& gradphi) override;
-
-  std::unique_ptr<SPOSetT<T>> makeClone() const override;
-
-  void createResource(ResourceCollection& collection) const override;
-
-  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
-
-  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
-
-  /// check if the multi walker resource is owned. For testing only.
-  bool isResourceOwned() const { return bool(mw_res_handle_); }
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using GradType = typename SPOSetT<T>::GradType;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using OffloadMWVGLArray =
+        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+    // using OffloadMWVGLArray = typename SPOSetT<T>::template
+    // OffloadMWCGLArray;
+    template <typename DT>
+    using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
+    using RealType = typename SPOSetT<T>::RealType;
+    using IndexType = OHMMS_INDEXTYPE;
+
+    /** constructor */
+    SpinorSetT(const std::string& my_name);
+    ~SpinorSetT() override;
+
+    std::string
+    getClassName() const override
+    {
+        return "SpinorSet";
+    }
+    bool
+    isOptimizable() const override
+    {
+        return spo_up->isOptimizable() || spo_dn->isOptimizable();
+    }
+    bool
+    isOMPoffload() const override
+    {
+        return spo_up->isOMPoffload() || spo_dn->isOMPoffload();
+    }
+    bool
+    hasIonDerivs() const override
+    {
+        return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs();
+    }
+
+    // This class is initialized by separately building the up and down channels
+    // of the spinor set and then registering them.
+    void
+    set_spos(
+        std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
+
+    /** set the OrbitalSetSize
+     * @param norbs number of single-particle orbitals
+     */
+    void
+    setOrbitalSetSize(int norbs) override;
+
+    /** evaluate the values of this spinor set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     */
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param d2psi laplacians of the SPO
+     */
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital set
+     * @param P current ParticleSet
+     * @param iat active particle
+     * @param psi values of the SPO
+     * @param dpsi gradients of the SPO
+     * @param d2psi laplacians of the SPO
+     * @param dspin spin gradient of the SPO
+     */
+    void
+    evaluateVGL_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) override;
+
+    /** evaluate the values, gradients and laplacians and spin gradient of this
+     * single-particle orbital sets of multiple walkers
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param psi_v_list the list of value vector pointers in a walker batch
+     * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+     * @param d2psi_v_list the list of laplacian vector pointers in a walker
+     * batch
+     * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals
+     */
+    void
+    mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const RefVector<ValueVector>& psi_v_list,
+        const RefVector<GradVector>& dpsi_v_list,
+        const RefVector<ValueVector>& d2psi_v_list,
+        OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital sets and determinant ratio and grads of multiple walkers. Device
+     * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients
+     * @param spo_list the list of SPOSet pointers in a walker batch
+     * @param P_list the list of ParticleSet pointers in a walker batch
+     * @param iat active particle
+     * @param phi_vgl_v orbital values, gradients and laplacians of all the
+     * walkers
+     * @param ratios, ratios of all walkers
+     * @param grads, spatial gradients of all walkers
+     * @param spingrads, spin gradients of all walkers
+     */
+    void
+    mw_evaluateVGLandDetRatioGradsWithSpin(
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
+        const std::vector<const T*>& invRow_ptr_list,
+        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
+        std::vector<GradType>& grads, std::vector<T>& spingrads) const override;
+
+    /** evaluate the values, gradients and laplacians of this single-particle
+     * orbital for [first,last) particles
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param logdet determinant matrix to be inverted
+     * @param dlogdet gradients
+     * @param d2logdet laplacians
+     *
+     */
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override;
+
+    void
+    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
+        const RefVector<ValueMatrix>& logdet_list,
+        const RefVector<GradMatrix>& dlogdet_list,
+        const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+    void
+    evaluate_notranspose_spin(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
+        ValueMatrix& dspinlogdet) override;
+    /** Evaluate the values, spin gradients, and spin laplacians of single
+     * particle spinors corresponding to electron iat.
+     *  @param P current particle set.
+     *  @param iat electron index.
+     *  @param spinor values.
+     *  @param spin gradient values. d/ds phi(r,s).
+     *
+     */
+    void
+    evaluate_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        ValueVector& dpsi) override;
+
+    /** evaluate the gradients of this single-particle orbital
+     *  for [first,last) target particles with respect to the given source
+     * particle
+     * @param P current ParticleSet
+     * @param first starting index of the particles
+     * @param last ending index of the particles
+     * @param iat_src source particle index
+     * @param gradphi gradients
+     *
+     */
+    virtual void
+    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
+        const ParticleSetT<T>& source, int iat_src,
+        GradMatrix& gradphi) override;
+
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override;
+
+    void
+    createResource(ResourceCollection& collection) const override;
+
+    void
+    acquireResource(ResourceCollection& collection,
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+    void
+    releaseResource(ResourceCollection& collection,
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+    /// check if the multi walker resource is owned. For testing only.
+    bool
+    isResourceOwned() const
+    {
+        return bool(mw_res_handle_);
+    }
 
 private:
-  struct SpinorSetMultiWalkerResource;
-  ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
-
-  std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> extractSpinComponentRefList(
-      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
-
-  //Sposet for the up and down channels of our spinors.
-  std::unique_ptr<SPOSetT<T>> spo_up;
-  std::unique_ptr<SPOSetT<T>> spo_dn;
-
-  //temporary arrays for holding the values of the up and down channels respectively.
-  ValueVector psi_work_up;
-  ValueVector psi_work_down;
-
-  //temporary arrays for holding the gradients of the up and down channels respectively.
-  GradVector dpsi_work_up;
-  GradVector dpsi_work_down;
-
-  //temporary arrays for holding the laplacians of the up and down channels respectively.
-  ValueVector d2psi_work_up;
-  ValueVector d2psi_work_down;
-
-  //Same as above, but these are the full matrices containing all spinor/particle combinations.
-  ValueMatrix logpsi_work_up;
-  ValueMatrix logpsi_work_down;
-
-  GradMatrix dlogpsi_work_up;
-  GradMatrix dlogpsi_work_down;
-
-  ValueMatrix d2logpsi_work_up;
-  ValueMatrix d2logpsi_work_down;
+    struct SpinorSetMultiWalkerResource;
+    ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
+
+    std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>>
+    extractSpinComponentRefList(
+        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
+
+    // Sposet for the up and down channels of our spinors.
+    std::unique_ptr<SPOSetT<T>> spo_up;
+    std::unique_ptr<SPOSetT<T>> spo_dn;
+
+    // temporary arrays for holding the values of the up and down channels
+    // respectively.
+    ValueVector psi_work_up;
+    ValueVector psi_work_down;
+
+    // temporary arrays for holding the gradients of the up and down channels
+    // respectively.
+    GradVector dpsi_work_up;
+    GradVector dpsi_work_down;
+
+    // temporary arrays for holding the laplacians of the up and down channels
+    // respectively.
+    ValueVector d2psi_work_up;
+    ValueVector d2psi_work_down;
+
+    // Same as above, but these are the full matrices containing all
+    // spinor/particle combinations.
+    ValueMatrix logpsi_work_up;
+    ValueMatrix logpsi_work_down;
+
+    GradMatrix dlogpsi_work_up;
+    GradMatrix dlogpsi_work_down;
+
+    ValueMatrix d2logpsi_work_up;
+    ValueMatrix d2logpsi_work_down;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/VariableSetT.cpp b/src/QMCWaveFunctions/VariableSetT.cpp
new file mode 100644
index 0000000000..064ac26a13
--- /dev/null
+++ b/src/QMCWaveFunctions/VariableSetT.cpp
@@ -0,0 +1,346 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "VariableSetT.h"
+
+#include "Host/sysutil.h"
+#include "io/hdf/hdf_archive.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <ios>
+#include <map>
+#include <stdexcept>
+
+using std::setw;
+
+namespace optimize
+{
+template <typename T>
+void
+VariableSetT<T>::clear()
+{
+    num_active_vars = 0;
+    Index.clear();
+    NameAndValue.clear();
+    Recompute.clear();
+    ParameterType.clear();
+}
+
+template <typename T>
+void
+VariableSetT<T>::insertFrom(const VariableSetT& input)
+{
+    for (int i = 0; i < input.size(); ++i) {
+        iterator loc = find(input.name(i));
+        if (loc == NameAndValue.end()) {
+            Index.push_back(input.Index[i]);
+            NameAndValue.push_back(input.NameAndValue[i]);
+            ParameterType.push_back(input.ParameterType[i]);
+            Recompute.push_back(input.Recompute[i]);
+        }
+        else
+            (*loc).second = input.NameAndValue[i].second;
+    }
+    num_active_vars = input.num_active_vars;
+}
+
+template <typename T>
+void
+VariableSetT<T>::insertFromSum(
+    const VariableSetT& input_1, const VariableSetT& input_2)
+{
+    value_type sum_val;
+    std::string vname;
+
+    // Check that objects to be summed together have the same number of active
+    // variables.
+    if (input_1.num_active_vars != input_2.num_active_vars)
+        throw std::runtime_error(
+            "Inconsistent number of parameters in two provided "
+            "variable sets.");
+
+    for (int i = 0; i < input_1.size(); ++i) {
+        // Check that each of the equivalent variables in both VariableSet
+        // objects have the same name - otherwise we certainly shouldn't be
+        // adding them.
+        if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
+            throw std::runtime_error(
+                "Inconsistent parameters exist in the two provided "
+                "variable sets.");
+
+        sum_val =
+            input_1.NameAndValue[i].second + input_2.NameAndValue[i].second;
+
+        iterator loc = find(input_1.name(i));
+        if (loc == NameAndValue.end()) {
+            Index.push_back(input_1.Index[i]);
+            ParameterType.push_back(input_1.ParameterType[i]);
+            Recompute.push_back(input_1.Recompute[i]);
+
+            // We can reuse the above values, which aren't summed between the
+            // objects, but the parameter values themselves need to use the
+            // summed values.
+            vname = input_1.NameAndValue[i].first;
+            NameAndValue.push_back(pair_type(vname, sum_val));
+        }
+        else
+            (*loc).second = sum_val;
+    }
+    num_active_vars = input_1.num_active_vars;
+}
+
+template <typename T>
+void
+VariableSetT<T>::insertFromDiff(
+    const VariableSetT& input_1, const VariableSetT& input_2)
+{
+    value_type diff_val;
+    std::string vname;
+
+    // Check that objects to be subtracted have the same number of active
+    // variables.
+    if (input_1.num_active_vars != input_2.num_active_vars)
+        throw std::runtime_error(
+            "Inconsistent number of parameters in two provided "
+            "variable sets.");
+
+    for (int i = 0; i < input_1.size(); ++i) {
+        // Check that each of the equivalent variables in both VariableSet
+        // objects have the same name - otherwise we certainly shouldn't be
+        // subtracting them.
+        if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
+            throw std::runtime_error(
+                "Inconsistent parameters exist in the two provided "
+                "variable sets.");
+
+        diff_val =
+            input_1.NameAndValue[i].second - input_2.NameAndValue[i].second;
+
+        iterator loc = find(input_1.name(i));
+        if (loc == NameAndValue.end()) {
+            Index.push_back(input_1.Index[i]);
+            ParameterType.push_back(input_1.ParameterType[i]);
+            Recompute.push_back(input_1.Recompute[i]);
+
+            // We can reuse the above values, which aren't subtracted between
+            // the objects, but the parameter values themselves need to use the
+            // subtracted values.
+            vname = input_1.NameAndValue[i].first;
+            NameAndValue.push_back(pair_type(vname, diff_val));
+        }
+        else
+            (*loc).second = diff_val;
+    }
+    num_active_vars = input_1.num_active_vars;
+}
+
+template <typename T>
+void
+VariableSetT<T>::removeInactive()
+{
+    std::vector<int> valid(Index);
+    std::vector<pair_type> acopy(NameAndValue);
+    std::vector<index_pair_type> bcopy(Recompute), ccopy(ParameterType);
+    num_active_vars = 0;
+    Index.clear();
+    NameAndValue.clear();
+    Recompute.clear();
+    ParameterType.clear();
+    for (int i = 0; i < valid.size(); ++i) {
+        if (valid[i] > -1) {
+            Index.push_back(num_active_vars++);
+            NameAndValue.push_back(acopy[i]);
+            Recompute.push_back(bcopy[i]);
+            ParameterType.push_back(ccopy[i]);
+        }
+    }
+}
+
+template <typename T>
+void
+VariableSetT<T>::resetIndex()
+{
+    num_active_vars = 0;
+    for (int i = 0; i < Index.size(); ++i) {
+        Index[i] = (Index[i] < 0) ? -1 : num_active_vars++;
+    }
+}
+
+template <typename T>
+void
+VariableSetT<T>::getIndex(const VariableSetT& selected)
+{
+    num_active_vars = 0;
+    for (int i = 0; i < NameAndValue.size(); ++i) {
+        Index[i] = selected.getIndex(NameAndValue[i].first);
+        if (Index[i] >= 0)
+            num_active_vars++;
+    }
+}
+
+template <typename T>
+int
+VariableSetT<T>::getIndex(const std::string& vname) const
+{
+    int loc = 0;
+    while (loc != NameAndValue.size()) {
+        if (NameAndValue[loc].first == vname)
+            return Index[loc];
+        ++loc;
+    }
+    return -1;
+}
+
+template <typename T>
+void
+VariableSetT<T>::setIndexDefault()
+{
+    for (int i = 0; i < Index.size(); ++i)
+        Index[i] = i;
+}
+
+template <typename T>
+void
+VariableSetT<T>::print(
+    std::ostream& os, int leftPadSpaces, bool printHeader) const
+{
+    std::string pad_str = std::string(leftPadSpaces, ' ');
+    int max_name_len = 0;
+    if (NameAndValue.size() > 0)
+        max_name_len = std::max_element(NameAndValue.begin(),
+            NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) {
+                return e1.first.length() < e2.first.length();
+            })->first.length();
+
+    int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading
+                            // value, period, and exponent.
+    int max_type_len = 1;
+    int max_recompute_len = 1;
+    int max_use_len = 3;
+    int max_index_len = 1;
+    if (printHeader) {
+        max_name_len = std::max(max_name_len, 4); // size of "Name" header
+        max_type_len = 4;
+        max_recompute_len = 9;
+        max_index_len = 5;
+        os << pad_str << setw(max_name_len) << "Name"
+           << " " << setw(max_value_len) << "Value"
+           << " " << setw(max_type_len) << "Type"
+           << " " << setw(max_recompute_len) << "Recompute"
+           << " " << setw(max_use_len) << "Use"
+           << " " << setw(max_index_len) << "Index" << std::endl;
+        os << pad_str << std::setfill('-') << setw(max_name_len) << ""
+           << " " << setw(max_value_len) << ""
+           << " " << setw(max_type_len) << ""
+           << " " << setw(max_recompute_len) << ""
+           << " " << setw(max_use_len) << ""
+           << " " << setw(max_index_len) << "" << std::endl;
+        os << std::setfill(' ');
+    }
+
+    for (int i = 0; i < NameAndValue.size(); ++i) {
+        os << pad_str << setw(max_name_len) << NameAndValue[i].first << " "
+           << std::setprecision(6) << std::scientific << setw(max_value_len)
+           << NameAndValue[i].second << " " << setw(max_type_len)
+           << ParameterType[i].second << " " << setw(max_recompute_len)
+           << Recompute[i].second << " ";
+
+        os << std::defaultfloat;
+
+        if (Index[i] < 0)
+            os << setw(max_use_len) << "OFF" << std::endl;
+        else
+            os << setw(max_use_len) << "ON"
+               << " " << setw(max_index_len) << Index[i] << std::endl;
+    }
+}
+
+template <typename T>
+void
+VariableSetT<T>::writeToHDF(
+    const std::string& filename, qmcplusplus::hdf_archive& hout) const
+{
+    hout.create(filename);
+
+    // File Versioning
+    // 1.0.0  Initial file version
+    // 1.1.0  Files could have object-specific data from
+    // OptimizableObject::read/writeVariationalParameters
+    std::vector<int> vp_file_version{1, 1, 0};
+    hout.write(vp_file_version, "version");
+
+    std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z"));
+    hout.write(timestamp, "timestamp");
+
+    hout.push("name_value_lists");
+
+    std::vector<value_type> param_values;
+    std::vector<std::string> param_names;
+    for (auto& pair_it : NameAndValue) {
+        param_names.push_back(pair_it.first);
+        param_values.push_back(pair_it.second);
+    }
+
+    hout.write(param_names, "parameter_names");
+    hout.write(param_values, "parameter_values");
+    hout.pop();
+}
+
+template <typename T>
+void
+VariableSetT<T>::readFromHDF(
+    const std::string& filename, qmcplusplus::hdf_archive& hin)
+{
+    if (!hin.open(filename, H5F_ACC_RDONLY)) {
+        std::ostringstream err_msg;
+        err_msg << "Unable to open VP file: " << filename;
+        throw std::runtime_error(err_msg.str());
+    }
+
+    try {
+        hin.push("name_value_lists", false);
+    }
+    catch (std::runtime_error&) {
+        std::ostringstream err_msg;
+        err_msg << "The group name_value_lists in not present in file: "
+                << filename;
+        throw std::runtime_error(err_msg.str());
+    }
+
+    std::vector<value_type> param_values;
+    hin.read(param_values, "parameter_values");
+
+    std::vector<std::string> param_names;
+    hin.read(param_names, "parameter_names");
+
+    for (int i = 0; i < param_names.size(); i++) {
+        std::string& vp_name = param_names[i];
+        // Find and set values by name.
+        // Values that are not present do not get added.
+        if (find(vp_name) != end())
+            (*this)[vp_name] = param_values[i];
+    }
+
+    hin.pop();
+}
+
+template struct VariableSetT<double>;
+template struct VariableSetT<float>;
+template struct VariableSetT<std::complex<double>>;
+template struct VariableSetT<std::complex<float>>;
+
+} // namespace optimize
diff --git a/src/QMCWaveFunctions/VariableSetT.h b/src/QMCWaveFunctions/VariableSetT.h
new file mode 100644
index 0000000000..9a0675a184
--- /dev/null
+++ b/src/QMCWaveFunctions/VariableSetT.h
@@ -0,0 +1,336 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H
+#define QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H
+#include "config.h"
+#include <map>
+#include <vector>
+#include <iostream>
+#include <complex>
+#include "VariableSet.h"
+#include "OrbitalSetTraits.h"
+
+namespace qmcplusplus
+{
+class hdf_archive;
+}
+
+namespace optimize
+{
+/** An enum useful for determining the type of parameter is being optimized.
+*   knowing this in the opt routine can reduce the computational load.
+*/
+// enum
+// {
+//   OTHER_P = 0,
+//   LOGLINEAR_P, //B-spline Jastrows
+//   LOGLINEAR_K, //K space Jastrows
+//   LINEAR_P,    //Multi-determinant coefficients
+//   SPO_P,       //SPO set Parameters
+//   BACKFLOW_P   //Backflow parameters
+// };
+
+/** class to handle a set of variables that can be modified during optimizations
+ *
+ * A serialized container of named variables.
+ */
+template <typename T>
+struct VariableSetT
+{
+  using value_type = typename qmcplusplus::OrbitalSetTraits<T>::ValueType;
+  using real_type  = typename qmcplusplus::OrbitalSetTraits<T>::RealType;
+
+  using pair_type       = std::pair<std::string, value_type>;
+  using index_pair_type = std::pair<std::string, int>;
+  using iterator        = typename std::vector<pair_type>::iterator;
+  using const_iterator  = typename std::vector<pair_type>::const_iterator;
+  using size_type       = typename std::vector<pair_type>::size_type;
+
+  ///number of active variables
+  int num_active_vars;
+  /** store locator of the named variable
+   *
+   * if(Index[i]  == -1), the named variable is not active
+   */
+  std::vector<int> Index;
+  std::vector<pair_type> NameAndValue;
+  std::vector<index_pair_type> ParameterType;
+  std::vector<index_pair_type> Recompute;
+
+  ///default constructor
+  inline VariableSetT() : num_active_vars(0) {}
+  ///viturval destructor for safety
+  virtual ~VariableSetT() = default;
+  /** if any of Index value is not zero, return true
+   */
+  inline bool is_optimizable() const { return num_active_vars > 0; }
+  ///return the number of active variables
+  inline int size_of_active() const { return num_active_vars; }
+  ///return the first const_iterator
+  inline const_iterator begin() const { return NameAndValue.begin(); }
+  ///return the last const_iterator
+  inline const_iterator end() const { return NameAndValue.end(); }
+  ///return the first iterator
+  inline iterator begin() { return NameAndValue.begin(); }
+  ///return the last iterator
+  inline iterator end() { return NameAndValue.end(); }
+  ///return the size
+  inline size_type size() const { return NameAndValue.size(); }
+  ///return the locator of the i-th Index
+  inline int where(int i) const { return Index[i]; }
+  /** return the iterator of a named parameter
+   * @param vname name of a parameter
+   * @return the locator of vname
+   *
+   * If vname is not found among the Names, return NameAndValue.end()
+   * so that ::end() member function can be used to validate the iterator.
+   */
+  inline iterator find(const std::string& vname)
+  {
+    return std::find_if(NameAndValue.begin(), NameAndValue.end(),
+                        [&vname](const auto& value) { return value.first == vname; });
+  }
+
+  /** return the Index vaule for the named parameter
+   * @param vname name of the variable
+   *
+   * If vname is not found in this variables, return -1;
+   */
+  int getIndex(const std::string& vname) const;
+
+  /* return the NameAndValue index for the named parameter
+   * @ param vname name of the variable
+   *
+   * Differs from getIndex by not relying on the indices cached in Index
+   * myVars[i] will always return the value of the parameter if it is stored
+   * regardless of whether or not the Index array has been correctly reset
+   *
+   * if vname is not found, return -1
+   *
+   */
+  inline int getLoc(const std::string& vname) const
+  {
+    int loc = 0;
+    while (loc != NameAndValue.size())
+    {
+      if (NameAndValue[loc].first == vname)
+        return loc;
+      ++loc;
+    }
+    return -1;
+  }
+
+  inline void insert(const std::string& vname, value_type v, bool enable = true, int type = OTHER_P)
+  {
+    iterator loc = find(vname);
+    int ind_loc  = loc - NameAndValue.begin();
+    if (loc == NameAndValue.end()) //  && enable==true)
+    {
+      Index.push_back(ind_loc);
+      NameAndValue.push_back(pair_type(vname, v));
+      ParameterType.push_back(index_pair_type(vname, type));
+      Recompute.push_back(index_pair_type(vname, 1));
+    }
+    //disable it if enable == false
+    if (!enable)
+      Index[ind_loc] = -1;
+  }
+
+  inline void setParameterType(int type)
+  {
+    std::vector<index_pair_type>::iterator PTit(ParameterType.begin()), PTend(ParameterType.end());
+    while (PTit != PTend)
+    {
+      (*PTit).second = type;
+      PTit++;
+    }
+  }
+
+  inline void getParameterTypeList(std::vector<int>& types) const
+  {
+    auto ptit(ParameterType.begin()), ptend(ParameterType.end());
+    types.resize(ptend - ptit);
+    auto tit(types.begin());
+    while (ptit != ptend)
+      (*tit++) = (*ptit++).second;
+  }
+
+
+  /** equivalent to std::map<std::string,T>[string] operator
+   */
+  inline value_type& operator[](const std::string& vname)
+  {
+    iterator loc = find(vname);
+    if (loc == NameAndValue.end())
+    {
+      Index.push_back(-1);
+      NameAndValue.push_back(pair_type(vname, 0));
+      ParameterType.push_back(index_pair_type(vname, 0));
+      Recompute.push_back(index_pair_type(vname, 1));
+      return NameAndValue.back().second;
+    }
+    return (*loc).second;
+  }
+
+
+  /** return the name of i-th variable
+   * @param i index
+   */
+  const std::string& name(int i) const { return NameAndValue[i].first; }
+
+  /** return the i-th value
+   * @param i index
+   */
+  inline value_type operator[](int i) const { return NameAndValue[i].second; }
+
+  /** assign the i-th value
+   * @param i index
+   */
+  inline value_type& operator[](int i) { return NameAndValue[i].second; }
+
+  /** get the i-th parameter's type
+  * @param i index
+  */
+  inline int getType(int i) const { return ParameterType[i].second; }
+
+  inline bool recompute(int i) const { return (Recompute[i].second == 1); }
+
+  inline int& recompute(int i) { return Recompute[i].second; }
+
+  inline void setComputed()
+  {
+    for (int i = 0; i < Recompute.size(); i++)
+    {
+      if (ParameterType[i].second == LOGLINEAR_P)
+        Recompute[i].second = 0;
+      else if (ParameterType[i].second == LOGLINEAR_K)
+        Recompute[i].second = 0;
+      else
+        Recompute[i].second = 1;
+    }
+  }
+
+  inline void setRecompute()
+  {
+    for (int i = 0; i < Recompute.size(); i++)
+      Recompute[i].second = 1;
+  }
+
+  /** clear the variable set
+   *
+   * Remove all the data.
+   */
+  void clear();
+
+  /** insert a VariableSet to the list
+   * @param input variables
+   */
+  void insertFrom(const VariableSetT& input);
+
+  /** sum together the values of the optimizable parameter values in
+   *  two VariableSet objects, and set this object's values to equal them.
+   *  @param first set of input variables
+   *  @param second set of input variables
+   */
+  void insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2);
+
+  /** take the difference (input_1-input_2) of values of the optimizable
+   *  parameter values in two VariableSet objects, and set this object's
+   *  values to equal them.
+   *  @param first set of input variables
+   *  @param second set of input variables
+   */
+  void insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2);
+
+  /** activate variables for optimization
+   * @param first iterator of the first name
+   * @param last iterator of the last name
+   * @param reindex if true, Index is updated
+   *
+   * The status of a variable that is not included in the [first,last)
+   * remains the same.
+   */
+  template<typename ForwardIterator>
+  void activate(ForwardIterator first, ForwardIterator last, bool reindex)
+  {
+    while (first != last)
+    {
+      iterator loc = find(*first++);
+      if (loc != NameAndValue.end())
+      {
+        int i = loc - NameAndValue.begin();
+        if (Index[i] < 0)
+          Index[i] = num_active_vars++;
+      }
+    }
+    if (reindex)
+    {
+      removeInactive();
+      resetIndex();
+    }
+  }
+
+  /** deactivate variables for optimization
+   * @param first iterator of the first name
+   * @param last iterator of the last name
+   * @param reindex if true, the variales are removed and Index is updated
+   */
+  template<typename ForwardIterator>
+  void disable(ForwardIterator first, ForwardIterator last, bool reindex)
+  {
+    while (first != last)
+    {
+      int loc = find(*first++) - NameAndValue.begin();
+      if (loc < NameAndValue.size())
+        Index[loc] = -1;
+    }
+    if (reindex)
+    {
+      removeInactive();
+      resetIndex();
+    }
+  }
+
+  /** reset Index
+   */
+  void resetIndex();
+  /** remove inactive variables and trim the internal data
+   */
+  void removeInactive();
+
+  /** set the index table of this VariableSet
+   * @param selected input variables
+   *
+   * This VariableSet is a subset of selected.
+   */
+  void getIndex(const VariableSetT& selected);
+
+  /** set default Indices, namely all the variables are active
+   */
+  void setIndexDefault();
+
+  void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const;
+
+  // Save variational parameters to an HDF file
+  void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const;
+
+  /// Read variational parameters from an HDF file.
+  /// This assumes VariableSet is already set up.
+  void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin);
+};
+} // namespace optimize
+
+#endif
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
index 49e5070241..ecdb5dd696 100644
--- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
@@ -1,10 +1,11 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2023 Raymond Clay and QMCPACK developers.
 //
-// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National
+// Laboratories
 //
 // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
@@ -14,106 +15,110 @@
 namespace qmcplusplus
 {
 
-template<class T>
-ConstantSPOSetT<T>::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals)
-    : SPOSetT<T>(my_name), numparticles_(nparticles)
+template <class T>
+ConstantSPOSetT<T>::ConstantSPOSetT(
+    const std::string& my_name, const int nparticles, const int norbitals) :
+    SPOSetT<T>(my_name),
+    numparticles_(nparticles)
 {
-  this->OrbitalSetSize = norbitals;
-  ref_psi_.resize(numparticles_, this->OrbitalSetSize);
-  ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
-  ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
+    this->OrbitalSetSize = norbitals;
+    ref_psi_.resize(numparticles_, this->OrbitalSetSize);
+    ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
+    ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
 
-  ref_psi_   = 0.0;
-  ref_egrad_ = 0.0;
-  ref_elapl_ = 0.0;
+    ref_psi_ = 0.0;
+    ref_egrad_ = 0.0;
+    ref_elapl_ = 0.0;
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> ConstantSPOSetT<T>::makeClone() const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+ConstantSPOSetT<T>::makeClone() const
 {
-  auto myclone = std::make_unique<ConstantSPOSetT<T>>(this->my_name_, numparticles_, this->OrbitalSetSize);
-  myclone->setRefVals(ref_psi_);
-  myclone->setRefEGrads(ref_egrad_);
-  myclone->setRefELapls(ref_elapl_);
-  return myclone;
+    auto myclone = std::make_unique<ConstantSPOSetT<T>>(
+        this->my_name_, numparticles_, this->OrbitalSetSize);
+    myclone->setRefVals(ref_psi_);
+    myclone->setRefEGrads(ref_egrad_);
+    myclone->setRefELapls(ref_elapl_);
+    return myclone;
 }
 
-template<class T>
-void ConstantSPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+template <class T>
+void
+ConstantSPOSetT<T>::checkOutVariables(const OptVariablesType<T>& active)
 {
-  APP_ABORT("ConstantSPOSet should not call checkOutVariables");
+    APP_ABORT("ConstantSPOSet should not call checkOutVariables");
 };
 
-template<class T>
-void ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
+template <class T>
+void
+ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
 {
-  APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
+    APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
 }
 
-template<class T>
-void ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
+template <class T>
+void
+ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
 {
-  assert(vals.cols() == this->OrbitalSetSize);
-  assert(vals.rows() == numparticles_);
-  ref_psi_ = vals;
+    assert(vals.cols() == this->OrbitalSetSize);
+    assert(vals.rows() == numparticles_);
+    ref_psi_ = vals;
 }
 
-template<class T>
-void ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
+template <class T>
+void
+ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
 {
-  assert(grads.cols() == this->OrbitalSetSize);
-  assert(grads.rows() == numparticles_);
-  ref_egrad_ = grads;
+    assert(grads.cols() == this->OrbitalSetSize);
+    assert(grads.rows() == numparticles_);
+    ref_egrad_ = grads;
 }
 
-template<class T>
-void ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
+template <class T>
+void
+ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
 {
-  assert(lapls.cols() == this->OrbitalSetSize);
-  assert(lapls.rows() == numparticles_);
-  ref_elapl_ = lapls;
+    assert(lapls.cols() == this->OrbitalSetSize);
+    assert(lapls.rows() == numparticles_);
+    ref_elapl_ = lapls;
 }
 
-template<class T>
-void ConstantSPOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+ConstantSPOSetT<T>::evaluateValue(
+    const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  const auto* vp = dynamic_cast<const VirtualParticleSet*>(&P);
-  int ptcl       = vp ? vp->refPtcl : iat;
-  assert(psi.size() == this->OrbitalSetSize);
-  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
-    psi[iorb] = ref_psi_(ptcl, iorb);
+    const auto* vp = dynamic_cast<const VirtualParticleSetT<T>*>(&P);
+    int ptcl = vp ? vp->refPtcl : iat;
+    assert(psi.size() == this->OrbitalSetSize);
+    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+        psi[iorb] = ref_psi_(ptcl, iorb);
 }
 
-template<class T>
-void ConstantSPOSetT<T>::evaluateVGL(const ParticleSet& P,
-                                     int iat,
-                                     ValueVector& psi,
-                                     GradVector& dpsi,
-                                     ValueVector& d2psi)
+template <class T>
+void
+ConstantSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
+    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
-  {
-    psi[iorb]   = ref_psi_(iat, iorb);
-    dpsi[iorb]  = ref_egrad_(iat, iorb);
-    d2psi[iorb] = ref_elapl_(iat, iorb);
-  }
+    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) {
+        psi[iorb] = ref_psi_(iat, iorb);
+        dpsi[iorb] = ref_egrad_(iat, iorb);
+        d2psi[iorb] = ref_elapl_(iat, iorb);
+    }
 }
 
-template<class T>
-void ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
-                                              int first,
-                                              int last,
-                                              ValueMatrix& logdet,
-                                              GradMatrix& dlogdet,
-                                              ValueMatrix& d2logdet)
+template <class T>
+void
+ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
+    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
 {
-  for (int iat = first, i = 0; iat < last; ++iat, ++i)
-  {
-    ValueVector v(logdet[i], logdet.cols());
-    GradVector g(dlogdet[i], dlogdet.cols());
-    ValueVector l(d2logdet[i], d2logdet.cols());
-    evaluateVGL(P, iat, v, g, l);
-  }
+    for (int iat = first, i = 0; iat < last; ++iat, ++i) {
+        ValueVector v(logdet[i], logdet.cols());
+        GradVector g(dlogdet[i], dlogdet.cols());
+        ValueVector l(d2logdet[i], d2logdet.cols());
+        evaluateVGL(P, iat, v, g, l);
+    }
 }
 
 template class ConstantSPOSetT<float>;
@@ -121,4 +126,4 @@ template class ConstantSPOSetT<double>;
 template class ConstantSPOSetT<std::complex<float>>;
 template class ConstantSPOSetT<std::complex<double>>;
 
-} //namespace qmcplusplus
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
index 483136360a..d1ee5b24f7 100644
--- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
@@ -1,15 +1,15 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2023 Raymond Clay and QMCPACK developers.
 //
-// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National
+// Laboratories
 //
 // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_CONSTANTSPOSETT_H
 #define QMCPLUSPLUS_CONSTANTSPOSETT_H
 
@@ -17,77 +17,92 @@
 
 namespace qmcplusplus
 {
-/** Constant SPOSet for testing purposes.  Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc.,
-   *  These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc.
-   *  Exists to provide deterministic and known output to objects requiring SPOSet evaluations.      
-   *
-   */
-template<class T>
+/** Constant SPOSet for testing purposes.  Fixed N_elec x N_orb matrices storing
+ * value, gradients, and laplacians, etc., These values are accessed through
+ * standard SPOSet calls like evaluateValue, evaluateVGL, etc. Exists to provide
+ * deterministic and known output to objects requiring SPOSet evaluations.
+ *
+ */
+template <class T>
 class ConstantSPOSetT : public SPOSetT<T>
 {
 public:
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-
-  ConstantSPOSetT(const std::string& my_name) = delete;
-
-  //Constructor needs number of particles and number of orbitals.  This is the minimum
-  //amount of information needed to sanely construct all data members and perform size
-  //checks later.
-  ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals);
-
-  std::unique_ptr<SPOSetT<T>> makeClone() const final;
-
-  std::string getClassName() const final { return "ConstantSPOSet"; };
-
-  void checkOutVariables(const opt_variables_type& active) final;
-
-  void setOrbitalSetSize(int norbs) final;
-
-  /**
-  * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_.
-  * @param Nelec x Nion ValueType matrix of \phi_j(r_i)
-  * @return void
-  */
-  void setRefVals(const ValueMatrix& vals);
-  /**
-  * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_.
-  * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i)
-  * @return void
-  */
-  void setRefEGrads(const GradMatrix& grads);
-  /**
-  * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_.
-  * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i)
-  * @return void
-  */
-  void setRefELapls(const ValueMatrix& lapls);
-
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
-
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
-
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) final;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+
+    ConstantSPOSetT(const std::string& my_name) = delete;
+
+    // Constructor needs number of particles and number of orbitals.  This is
+    // the minimum amount of information needed to sanely construct all data
+    // members and perform size checks later.
+    ConstantSPOSetT(
+        const std::string& my_name, const int nparticles, const int norbitals);
+
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const final;
+
+    std::string
+    getClassName() const final
+    {
+        return "ConstantSPOSet";
+    };
+
+    void
+    checkOutVariables(const OptVariablesType<T>& active) final;
+
+    void
+    setOrbitalSetSize(int norbs) final;
+
+    /**
+     * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_.
+     * @param Nelec x Nion ValueType matrix of \phi_j(r_i)
+     * @return void
+     */
+    void
+    setRefVals(const ValueMatrix& vals);
+    /**
+     * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in
+     * ref_egrad_.
+     * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i)
+     * @return void
+     */
+    void
+    setRefEGrads(const GradMatrix& grads);
+    /**
+     * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix
+     * in ref_elapl_.
+     * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i)
+     * @return void
+     */
+    void
+    setRefELapls(const ValueMatrix& lapls);
+
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
+
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) final;
+
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
 
 private:
-  const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices.
-                           /// To ensure consistent array sizing and enforcement,
-                           /// we agree at construction how large these matrices will be.
-                           /// norb is stored in SPOSet::OrbitalSetSize.
-
-  //Value, electron gradient, and electron laplacian at "reference configuration".
-  //i.e. before any attempted moves.
-
-  ValueMatrix ref_psi_;
-  GradMatrix ref_egrad_;
-  ValueMatrix ref_elapl_;
+    const int numparticles_; /// evaluate_notranspose arrays are nparticle x
+                             /// norb matrices. To ensure consistent array
+                             /// sizing and enforcement, we agree at
+                             /// construction how large these matrices will be.
+                             /// norb is stored in SPOSet::OrbitalSetSize.
+
+    // Value, electron gradient, and electron laplacian at "reference
+    // configuration". i.e. before any attempted moves.
+
+    ValueMatrix ref_psi_;
+    GradMatrix ref_egrad_;
+    ValueMatrix ref_elapl_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
index fcf1637682..85678ce5f3 100644
--- a/src/QMCWaveFunctions/tests/FakeSPOT.cpp
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
@@ -13,142 +13,135 @@
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 FakeSPOT<T>::FakeSPOT() : SPOSetT<T>("one_FakeSPO")
 {
-  a.resize(3, 3);
+    a.resize(3, 3);
 
-  a(0, 0) = 2.3;
-  a(0, 1) = 4.5;
-  a(0, 2) = 2.6;
-  a(1, 0) = 0.5;
-  a(1, 1) = 8.5;
-  a(1, 2) = 3.3;
-  a(2, 0) = 1.8;
-  a(2, 1) = 4.4;
-  a(2, 2) = 4.9;
+    a(0, 0) = 2.3;
+    a(0, 1) = 4.5;
+    a(0, 2) = 2.6;
+    a(1, 0) = 0.5;
+    a(1, 1) = 8.5;
+    a(1, 2) = 3.3;
+    a(2, 0) = 1.8;
+    a(2, 1) = 4.4;
+    a(2, 2) = 4.9;
 
-  v.resize(3);
-  v[0] = 1.9;
-  v[1] = 2.0;
-  v[2] = 3.1;
+    v.resize(3);
+    v[0] = 1.9;
+    v[1] = 2.0;
+    v[2] = 3.1;
 
+    a2.resize(4, 4);
+    a2(0, 0) = 2.3;
+    a2(0, 1) = 4.5;
+    a2(0, 2) = 2.6;
+    a2(0, 3) = 1.2;
+    a2(1, 0) = 0.5;
+    a2(1, 1) = 8.5;
+    a2(1, 2) = 3.3;
+    a2(1, 3) = 0.3;
+    a2(2, 0) = 1.8;
+    a2(2, 1) = 4.4;
+    a2(2, 2) = 4.9;
+    a2(2, 3) = 2.8;
+    a2(3, 0) = 0.8;
+    a2(3, 1) = 4.1;
+    a2(3, 2) = 3.2;
+    a2(3, 3) = 1.1;
 
-  a2.resize(4, 4);
-  a2(0, 0) = 2.3;
-  a2(0, 1) = 4.5;
-  a2(0, 2) = 2.6;
-  a2(0, 3) = 1.2;
-  a2(1, 0) = 0.5;
-  a2(1, 1) = 8.5;
-  a2(1, 2) = 3.3;
-  a2(1, 3) = 0.3;
-  a2(2, 0) = 1.8;
-  a2(2, 1) = 4.4;
-  a2(2, 2) = 4.9;
-  a2(2, 3) = 2.8;
-  a2(3, 0) = 0.8;
-  a2(3, 1) = 4.1;
-  a2(3, 2) = 3.2;
-  a2(3, 3) = 1.1;
+    v2.resize(4, 4);
 
-  v2.resize(4, 4);
+    v2(0, 0) = 3.2;
+    v2(0, 1) = 0.5;
+    v2(0, 2) = 5.9;
+    v2(0, 3) = 3.7;
+    v2(1, 0) = 0.3;
+    v2(1, 1) = 1.4;
+    v2(1, 2) = 3.9;
+    v2(1, 3) = 8.2;
+    v2(2, 0) = 3.3;
+    v2(2, 1) = 5.4;
+    v2(2, 2) = 4.9;
+    v2(2, 3) = 2.2;
+    v2(3, 1) = 5.4;
+    v2(3, 2) = 4.9;
+    v2(3, 3) = 2.2;
 
-  v2(0, 0) = 3.2;
-  v2(0, 1) = 0.5;
-  v2(0, 2) = 5.9;
-  v2(0, 3) = 3.7;
-  v2(1, 0) = 0.3;
-  v2(1, 1) = 1.4;
-  v2(1, 2) = 3.9;
-  v2(1, 3) = 8.2;
-  v2(2, 0) = 3.3;
-  v2(2, 1) = 5.4;
-  v2(2, 2) = 4.9;
-  v2(2, 3) = 2.2;
-  v2(3, 1) = 5.4;
-  v2(3, 2) = 4.9;
-  v2(3, 3) = 2.2;
-
-  gv.resize(4);
-  gv[0] = GradType(1.0, 0.0, 0.1);
-  gv[1] = GradType(1.0, 2.0, 0.1);
-  gv[2] = GradType(2.0, 1.0, 0.1);
-  gv[3] = GradType(0.4, 0.3, 0.1);
+    gv.resize(4);
+    gv[0] = GradType(1.0, 0.0, 0.1);
+    gv[1] = GradType(1.0, 2.0, 0.1);
+    gv[2] = GradType(2.0, 1.0, 0.1);
+    gv[3] = GradType(0.4, 0.3, 0.1);
 }
-template<class T>
-std::unique_ptr<SPOSetT<T>> FakeSPOT<T>::makeClone() const
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+FakeSPOT<T>::makeClone() const
 {
-  return std::make_unique<FakeSPOT<T>>(*this);
+    return std::make_unique<FakeSPOT<T>>(*this);
 }
 
-template<class T>
-void FakeSPOT<T>::setOrbitalSetSize(int norbs)
+template <class T>
+void
+FakeSPOT<T>::setOrbitalSetSize(int norbs)
 {
-  this->OrbitalSetSize = norbs;
+    this->OrbitalSetSize = norbs;
 }
 
-template<class T>
-void FakeSPOT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+template <class T>
+void
+FakeSPOT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-  if (iat < 0)
-    for (int i = 0; i < psi.size(); i++)
-      psi[i] = 1.2 * i - i * i;
-  else if (this->OrbitalSetSize == 3)
-    for (int i = 0; i < 3; i++)
-      psi[i] = a(iat, i);
-  else if (this->OrbitalSetSize == 4)
-    for (int i = 0; i < 4; i++)
-      psi[i] = a2(iat, i);
+    if (iat < 0)
+        for (int i = 0; i < psi.size(); i++)
+            psi[i] = 1.2 * i - i * i;
+    else if (this->OrbitalSetSize == 3)
+        for (int i = 0; i < 3; i++)
+            psi[i] = a(iat, i);
+    else if (this->OrbitalSetSize == 4)
+        for (int i = 0; i < 4; i++)
+            psi[i] = a2(iat, i);
 }
 
-template<class T>
-void FakeSPOT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template <class T>
+void
+FakeSPOT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+    GradVector& dpsi, ValueVector& d2psi)
 {
-  if (this->OrbitalSetSize == 3)
-  {
-    for (int i = 0; i < 3; i++)
-    {
-      psi[i]  = v[i];
-      dpsi[i] = gv[i];
+    if (this->OrbitalSetSize == 3) {
+        for (int i = 0; i < 3; i++) {
+            psi[i] = v[i];
+            dpsi[i] = gv[i];
+        }
     }
-  }
-  else if (this->OrbitalSetSize == 4)
-  {
-    for (int i = 0; i < 4; i++)
-    {
-      psi[i]  = v2(iat, i);
-      dpsi[i] = gv[i];
+    else if (this->OrbitalSetSize == 4) {
+        for (int i = 0; i < 4; i++) {
+            psi[i] = v2(iat, i);
+            dpsi[i] = gv[i];
+        }
     }
-  }
 }
 
-template<class T>
-void FakeSPOT<T>::evaluate_notranspose(const ParticleSet& P,
-                                       int first,
-                                       int last,
-                                       ValueMatrix& logdet,
-                                       GradMatrix& dlogdet,
-                                       ValueMatrix& d2logdet)
+template <class T>
+void
+FakeSPOT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
 {
-  if (this->OrbitalSetSize == 3)
-  {
-    for (int i = 0; i < 3; i++)
-      for (int j = 0; j < 3; j++)
-      {
-        logdet(j, i)  = a(i, j);
-        dlogdet[i][j] = gv[j] + GradType(i);
-      }
-  }
-  else if (this->OrbitalSetSize == 4)
-  {
-    for (int i = 0; i < 4; i++)
-      for (int j = 0; j < 4; j++)
-      {
-        logdet(j, i)  = a2(i, j);
-        dlogdet[i][j] = gv[j] + GradType(i);
-      }
-  }
+    if (this->OrbitalSetSize == 3) {
+        for (int i = 0; i < 3; i++)
+            for (int j = 0; j < 3; j++) {
+                logdet(j, i) = a(i, j);
+                dlogdet[i][j] = gv[j] + GradType(i);
+            }
+    }
+    else if (this->OrbitalSetSize == 4) {
+        for (int i = 0; i < 4; i++)
+            for (int j = 0; j < 4; j++) {
+                logdet(j, i) = a2(i, j);
+                dlogdet[i][j] = gv[j] + GradType(i);
+            }
+    }
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h
index dfa6689bd6..f0a6f1ef80 100644
--- a/src/QMCWaveFunctions/tests/FakeSPOT.h
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.h
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
@@ -9,7 +9,6 @@
 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_FAKESPOTT_H
 #define QMCPLUSPLUS_FAKESPOTT_H
 
@@ -17,45 +16,55 @@
 
 namespace qmcplusplus
 {
-template<class T>
+template <class T>
 class FakeSPOT : public SPOSetT<T>
 {
 public:
-  Matrix<T> a;
-  Matrix<T> a2;
-  Vector<T> v;
-  Matrix<T> v2;
+    Matrix<T> a;
+    Matrix<T> a2;
+    Vector<T> v;
+    Matrix<T> v2;
 
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using GradType    = typename SPOSetT<T>::GradType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using GradMatrix = typename SPOSetT<T>::GradMatrix;
+    using GradType = typename SPOSetT<T>::GradType;
 
-  typename SPOSetT<T>::GradVector gv;
+    typename SPOSetT<T>::GradVector gv;
 
-  FakeSPOT();
+    FakeSPOT();
 
-  ~FakeSPOT() override = default;
+    ~FakeSPOT() override = default;
 
-  std::string getClassName() const override { return "FakeSPO"; }
+    std::string
+    getClassName() const override
+    {
+        return "FakeSPO";
+    }
 
-  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+    std::unique_ptr<SPOSetT<T>>
+    makeClone() const override;
 
-  virtual void report() {}
+    virtual void
+    report()
+    {
+    }
 
-  void setOrbitalSetSize(int norbs) override;
+    void
+    setOrbitalSetSize(int norbs) override;
 
-  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+    void
+    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
 
-  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+    void
+    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
+        GradVector& dpsi, ValueVector& d2psi) override;
 
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ValueMatrix& logdet,
-                            GradMatrix& dlogdet,
-                            ValueMatrix& d2logdet) override;
+    void
+    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
+        ValueMatrix& logdet, GradMatrix& dlogdet,
+        ValueMatrix& d2logdet) override;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
index 87425bbb91..56d5b22e8a 100644
--- a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
+++ b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
@@ -63,8 +63,8 @@ TEST_CASE("ConstantSPOSetT", "[wavefunction]")
   }
 
 
-  const SimulationCell simulation_cell;
-  ParticleSet elec(simulation_cell);
+  const SimulationCellT<Value> simulation_cell;
+  ParticleSetT<Value> elec(simulation_cell);
 
   elec.create({nelec});
 
diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp
index 79e0e920a4..83aecc96d4 100644
--- a/src/type_traits/complex_help.hpp
+++ b/src/type_traits/complex_help.hpp
@@ -12,6 +12,9 @@
 #ifndef QMCPLUSPLUS_COMPLEX_HELP_HPP
 #define QMCPLUSPLUS_COMPLEX_HELP_HPP
 
+#include <complex>
+#include <type_traits>
+
 namespace qmcplusplus
 {
 template<typename T>