From c04b48de2e493c817b318d9ec1c935157b3aa547 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Mon, 24 Jul 2023 15:09:01 -0400
Subject: [PATCH 01/17] Implement SPOSetT template class

Asses the effort to change this.
Currently without consumers or tests.

Concretize friend class declaration

Define testing::getMyVars for SPOSetT
---
 src/QMCWaveFunctions/CMakeLists.txt           |   1 +
 src/QMCWaveFunctions/SPOSetT.cpp              | 399 ++++++++++++
 src/QMCWaveFunctions/SPOSetT.h                | 575 ++++++++++++++++++
 .../tests/test_RotatedSPOs.cpp                |   5 +
 4 files changed, 980 insertions(+)
 create mode 100644 src/QMCWaveFunctions/SPOSetT.cpp
 create mode 100644 src/QMCWaveFunctions/SPOSetT.h
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 8a0611a6f5..11da0d4cf9 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -32,6 +32,7 @@ set(WFBASE_SRCS
     SPOSetInfo.cpp
     SPOSetInputInfo.cpp
     SPOSet.cpp
+    SPOSetT.cpp
     CompositeSPOSet.cpp
     HarmonicOscillator/SHOSet.cpp
     HarmonicOscillator/SHOSetBuilder.cpp
diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
new file mode 100644
index 0000000000..6d488d4bea
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -0,0 +1,399 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SPOSetT.h"
+
+#include "CPU/SIMD/simd.hpp" // simd::dot
+
+namespace qmcplusplus
+{
+
+template<class T>
+SPOSetT<T>::SPOSetT(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0)
+{}
+
+template<class T>
+void SPOSetT<T>::extractOptimizableObjectRefs(UniqueOptObjRefs&)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::extractOptimizableObjectRefs "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::checkOutVariables "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                   ValueVector& psi,
+                                   const ValueVector& psiinv,
+                                   std::vector<T>& ratios)
+{
+  assert(psi.size() == psiinv.size());
+  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+  {
+    evaluateValue(VP, iat, psi);
+    ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size());
+  }
+}
+
+
+template<class T>
+void SPOSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                      const RefVector<ValueVector>& psi_list,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      std::vector<std::vector<T>>& ratios_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+  {
+    Vector<T> invRow(const_cast<T*>(invRow_ptr_list[iw]), psi_list[iw].get().size());
+    spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]);
+  }
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGL_spin(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  ValueVector& d2psi,
+                                  ValueVector& dspin)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin");
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                const RefVector<ValueVector>& psi_v_list,
+                                const RefVector<GradVector>& dpsi_v_list,
+                                const RefVector<ValueVector>& d2psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]);
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                  const RefVectorWithLeader<ParticleSet>& P_list,
+                                  int iat,
+                                  const RefVector<ValueVector>& psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]);
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                        const RefVectorWithLeader<ParticleSet>& P_list,
+                                        int iat,
+                                        const RefVector<ValueVector>& psi_v_list,
+                                        const RefVector<GradVector>& dpsi_v_list,
+                                        const RefVector<ValueVector>& d2psi_v_list,
+                                        OffloadMatrix<ComplexType>& mw_dspin) const
+{
+  throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n");
+}
+
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                                int iat,
+                                                const std::vector<const T*>& invRow_ptr_list,
+                                                OffloadMWVGLArray& phi_vgl_v,
+                                                std::vector<T>& ratios,
+                                                std::vector<GradType>& grads) const
+{
+  assert(this == &spo_list.getLeader());
+  assert(phi_vgl_v.size(0) == DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+  GradVector dphi_v(norb_requested);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested);
+    ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested);
+    spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v);
+
+    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested);
+    grads[iw]  = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw];
+
+    // transpose the array of gradients to SoA in phi_vgl_v
+    for (size_t idim = 0; idim < DIM; idim++)
+    {
+      T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0);
+      for (size_t iorb = 0; iorb < norb_requested; iorb++)
+        phi_g[iorb] = dphi_v[iorb][idim];
+    }
+  }
+  phi_vgl_v.updateTo();
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                        const RefVectorWithLeader<ParticleSet>& P_list,
+                                                        int iat,
+                                                        const std::vector<const T*>& invRow_ptr_list,
+                                                        OffloadMWVGLArray& phi_vgl_v,
+                                                        std::vector<T>& ratios,
+                                                        std::vector<GradType>& grads,
+                                                        std::vector<T>& spingrads) const
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& logdet,
+                                           GradMatrix& dlogdet,
+                                           ValueMatrix& d2logdet,
+                                           ValueMatrix& dspinlogdet)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                         const RefVectorWithLeader<ParticleSet>& P_list,
+                                         int first,
+                                         int last,
+                                         const RefVector<ValueMatrix>& logdet_list,
+                                         const RefVector<GradMatrix>& dlogdet_list,
+                                         const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]);
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SPOSetT<T>::makeClone() const
+{
+  throw std::runtime_error("Missing  SPOSet::makeClone for " + getClassName());
+}
+
+template<class T>
+void SPOSetT<T>::basic_report(const std::string& pad) const
+{
+  app_log() << pad << "size = " << size() << std::endl;
+  app_log() << pad << "state info:" << std::endl;
+  //states.report(pad+"  ");
+  app_log().flush();
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGH(const ParticleSet& P,
+                             int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGHGH(const ParticleSet& P,
+                               int iat,
+                               ValueVector& psi,
+                               GradVector& dpsi,
+                               HessVector& grad_grad_psi,
+                               GGGVector& grad_grad_grad_psi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  if (isRotationSupported())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::applyRotation "
+                           "must be overloaded when the SPOSet supports rotation.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     Vector<T>& dhpsioverpsi,
+                                     const int& FirstIndex,
+                                     const int& LastIndex)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivatives "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                       const opt_variables_type& optvars,
+                                       Vector<T>& dlogpsi,
+                                       int FirstIndex,
+                                       int LastIndex)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivativesWF "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
+                                     const opt_variables_type& optvars,
+                                     ValueVector& psi,
+                                     const ValueVector& psiinv,
+                                     std::vector<T>& ratios,
+                                     Matrix<T>& dratios,
+                                     int FirstIndex,
+                                     int LastIndex)
+{
+  // Match the fallback in WaveFunctionComponent that evaluates just the ratios
+  evaluateDetRatios(VP, psi, psiinv, ratios);
+
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivRatios "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     Vector<T>& dhpsioverpsi,
+                                     const T& psiCurrent,
+                                     const std::vector<T>& Coeff,
+                                     const std::vector<size_t>& C2node_up,
+                                     const std::vector<size_t>& C2node_dn,
+                                     const ValueVector& detValues_up,
+                                     const ValueVector& detValues_dn,
+                                     const GradMatrix& grads_up,
+                                     const GradMatrix& grads_dn,
+                                     const ValueMatrix& lapls_up,
+                                     const ValueMatrix& lapls_dn,
+                                     const ValueMatrix& M_up,
+                                     const ValueMatrix& M_dn,
+                                     const ValueMatrix& Minv_up,
+                                     const ValueMatrix& Minv_dn,
+                                     const GradMatrix& B_grad,
+                                     const ValueMatrix& B_lapl,
+                                     const std::vector<int>& detData_up,
+                                     const size_t N1,
+                                     const size_t N2,
+                                     const size_t NP1,
+                                     const size_t NP2,
+                                     const std::vector<std::vector<int>>& lookup_tbl)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivatives "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                       const opt_variables_type& optvars,
+                                       Vector<T>& dlogpsi,
+                                       const T& psiCurrent,
+                                       const std::vector<T>& Coeff,
+                                       const std::vector<size_t>& C2node_up,
+                                       const std::vector<size_t>& C2node_dn,
+                                       const ValueVector& detValues_up,
+                                       const ValueVector& detValues_dn,
+                                       const ValueMatrix& M_up,
+                                       const ValueMatrix& M_dn,
+                                       const ValueMatrix& Minv_up,
+                                       const ValueMatrix& Minv_dn,
+                                       const std::vector<int>& detData_up,
+                                       const std::vector<std::vector<int>>& lookup_tbl)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivativesWF "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSet& source,
+                                    int iat_src,
+                                    GradMatrix& gradphi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSource "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
+                                       int iel,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradVector& gradphi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSourceRow "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n");
+}
+
+// Class concrete types from ValueType
+template class SPOSetT<double>;
+template class SPOSetT<float>;
+template class SPOSetT<std::complex<double>>;
+template class SPOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
\ No newline at end of file
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
new file mode 100644
index 0000000000..a985df0183
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -0,0 +1,575 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SPOSETT_H
+#define QMCPLUSPLUS_SPOSETT_H
+
+#include "OhmmsPETE/OhmmsArray.h"
+#include "Particle/ParticleSet.h"
+#include "Particle/VirtualParticleSet.h"
+#include "QMCWaveFunctions/OrbitalSetTraits.h"
+#include "OptimizableObject.h"
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "DualAllocatorAliases.hpp"
+
+namespace qmcplusplus
+{
+class ResourceCollection;
+
+template<class T>
+class SPOSetT;
+namespace testing
+{
+opt_variables_type& getMyVars(SPOSetT<float>& spo);
+opt_variables_type& getMyVars(SPOSetT<double>& spo);
+opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& spo);
+opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& spo);
+} // namespace testing
+
+
+/** base class for Single-particle orbital sets
+ *
+ * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains
+ * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$
+ */
+template<class T>
+class SPOSetT : public QMCTraits
+{
+public:
+  using ValueVector       = typename OrbitalSetTraits<T>::ValueVector;
+  using ValueMatrix       = typename OrbitalSetTraits<T>::ValueMatrix;
+  using GradVector        = typename OrbitalSetTraits<T>::GradVector;
+  using GradMatrix        = typename OrbitalSetTraits<T>::GradMatrix;
+  using GradType          = TinyVector<T, DIM>;
+  using HessVector        = typename OrbitalSetTraits<T>::HessVector;
+  using HessMatrix        = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGVector         = typename OrbitalSetTraits<T>::GradHessVector;
+  using GGGMatrix         = typename OrbitalSetTraits<T>::GradHessMatrix;
+  using SPOMap            = std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+  template<typename DT>
+  using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
+
+  /** constructor */
+  SPOSetT<T>(const std::string& my_name);
+
+  /** destructor
+   *
+   * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet.
+   */
+  virtual ~SPOSetT<T>() = default;
+
+  /** return the size of the orbital set
+   * Ye: this needs to be replaced by getOrbitalSetSize();
+   */
+  inline int size() const { return OrbitalSetSize; }
+
+  /** print basic SPOSet information
+   */
+  void basic_report(const std::string& pad = "") const;
+
+  /** print SPOSet information
+   */
+  virtual void report(const std::string& pad = "") const { basic_report(pad); }
+
+
+  /** return the size of the orbitals
+   */
+  inline int getOrbitalSetSize() const { return OrbitalSetSize; }
+
+  /// Query if this SPOSet is optimizable
+  virtual bool isOptimizable() const { return false; }
+
+  /** extract underlying OptimizableObject references
+   * @param opt_obj_refs aggregated list of optimizable object references
+   */
+  virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs);
+
+  /** check out variational optimizable variables
+   * @param active a super set of optimizable variables
+   */
+  virtual void checkOutVariables(const opt_variables_type& active);
+
+  /// Query if this SPOSet uses OpenMP offload
+  virtual bool isOMPoffload() const { return false; }
+
+  /** Query if this SPOSet has an explicit ion dependence. returns true if it does.
+  */
+  virtual bool hasIonDerivs() const { return false; }
+
+  /// check a few key parameters before putting the SPO into a determinant
+  virtual void checkObject() const {}
+
+  /// return true if this SPOSet can be wrappered by RotatedSPO
+  virtual bool isRotationSupported() const { return false; }
+  /// store parameters before getting destroyed by rotation.
+  virtual void storeParamsBeforeRotation() {}
+  /// apply rotation to all the orbitals
+  virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
+
+  /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction
+  virtual void evaluateDerivatives(ParticleSet& P,
+                                   const opt_variables_type& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const int& FirstIndex,
+                                   const int& LastIndex);
+
+  /// Parameter derivatives of the wavefunction
+  virtual void evaluateDerivativesWF(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     int FirstIndex,
+                                     int LastIndex);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
+   *  this is used only for MSD, to be refined for better serving both single and multi SD
+   */
+  virtual void evaluateDerivatives(ParticleSet& P,
+                                   const opt_variables_type& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const T& psiCurrent,
+                                   const std::vector<T>& Coeff,
+                                   const std::vector<size_t>& C2node_up,
+                                   const std::vector<size_t>& C2node_dn,
+                                   const ValueVector& detValues_up,
+                                   const ValueVector& detValues_dn,
+                                   const GradMatrix& grads_up,
+                                   const GradMatrix& grads_dn,
+                                   const ValueMatrix& lapls_up,
+                                   const ValueMatrix& lapls_dn,
+                                   const ValueMatrix& M_up,
+                                   const ValueMatrix& M_dn,
+                                   const ValueMatrix& Minv_up,
+                                   const ValueMatrix& Minv_dn,
+                                   const GradMatrix& B_grad,
+                                   const ValueMatrix& B_lapl,
+                                   const std::vector<int>& detData_up,
+                                   const size_t N1,
+                                   const size_t N2,
+                                   const size_t NP1,
+                                   const size_t NP2,
+                                   const std::vector<std::vector<int>>& lookup_tbl);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
+   *  this is used only for MSD, to be refined for better serving both single and multi SD
+   */
+  virtual void evaluateDerivativesWF(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     const T& psiCurrent,
+                                     const std::vector<T>& Coeff,
+                                     const std::vector<size_t>& C2node_up,
+                                     const std::vector<size_t>& C2node_dn,
+                                     const ValueVector& detValues_up,
+                                     const ValueVector& detValues_dn,
+                                     const ValueMatrix& M_up,
+                                     const ValueMatrix& M_dn,
+                                     const ValueMatrix& Minv_up,
+                                     const ValueMatrix& Minv_dn,
+                                     const std::vector<int>& detData_up,
+                                     const std::vector<std::vector<int>>& lookup_tbl);
+
+  /** set the OrbitalSetSize
+   * @param norbs number of single-particle orbitals
+   * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly.
+   * It doesn't make sense allowing to set the value at any place in the code.
+   */
+  virtual void setOrbitalSetSize(int norbs) = 0;
+
+  /** evaluate the values of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   */
+  virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) = 0;
+
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
+   * @param VP virtual particle set
+   * @param psi values of the SPO, used as a scratch space if needed
+   * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually
+   * @param ratios return determinant ratios
+   */
+  virtual void evaluateDetRatios(const VirtualParticleSet& VP,
+                                 ValueVector& psi,
+                                 const ValueVector& psiinv,
+                                 std::vector<T>& ratios);
+
+
+  /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves
+  virtual void evaluateDerivRatios(const VirtualParticleSet& VP,
+                                   const opt_variables_type& optvars,
+                                   ValueVector& psi,
+                                   const ValueVector& psiinv,
+                                   std::vector<T>& ratios,
+                                   Matrix<T>& dratios,
+                                   int FirstIndex,
+                                   int LastIndex);
+
+
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param vp_list a list of virtual particle sets in a walker batch
+   * @param psi_list a list of values of the SPO, used as a scratch space if needed
+   * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually
+   * @param ratios_list a list of returning determinant ratios
+   */
+  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                    const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                    const RefVector<ValueVector>& psi_list,
+                                    const std::vector<const T*>& invRow_ptr_list,
+                                    std::vector<std::vector<T>>& ratios_list) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   */
+  virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) = 0;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   * @param dspin spin gradients of the SPO
+   */
+  virtual void evaluateVGL_spin(const ParticleSet& P,
+                                int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi,
+                                ValueVector& dspin);
+
+  /** evaluate the values this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   */
+  virtual void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                const RefVector<ValueVector>& psi_v_list) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   */
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list) const;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   * @param mw_dspin is a dual matrix of spin gradients [nw][norb]
+   * Note that the device side of mw_dspin is up to date
+   */
+  virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const RefVector<ValueVector>& psi_v_list,
+                                      const RefVector<GradVector>& dpsi_v_list,
+                                      const RefVector<ValueVector>& d2psi_v_list,
+                                      OffloadMatrix<ComplexType>& mw_dspin) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param psi_ratio_grads_v determinant ratio and grads of all the walkers
+   */
+  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
+   *  Includes spin gradients
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param ratios, ratios of all walkers
+   * @param grads, spatial gradients of all walkers
+   * @param spingrads, spin gradients of all walkers
+   */
+  virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                                      int iat,
+                                                      const std::vector<const T*>& invRow_ptr_list,
+                                                      OffloadMWVGLArray& phi_vgl_v,
+                                                      std::vector<T>& ratios,
+                                                      std::vector<GradType>& grads,
+                                                      std::vector<T>& spingrads) const;
+
+  /** evaluate the values, gradients and hessians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param grad_grad_psi hessians of the SPO
+   */
+  virtual void evaluateVGH(const ParticleSet& P,
+                           int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& grad_grad_psi);
+
+  /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param grad_grad_psi hessians of the SPO
+   * @param grad_grad_grad_psi grad hessians of the SPO
+   */
+  virtual void evaluateVGHGH(const ParticleSet& P,
+                             int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi,
+                             GGGVector& grad_grad_grad_psi);
+
+  /** evaluate the values of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   */
+  virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi);
+
+  /** evaluate the third derivatives of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param first first particle
+   * @param last last particle
+   * @param grad_grad_grad_logdet third derivatives of the SPO
+   */
+  virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet);
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
+   * @param[in] P current ParticleSet
+   * @param[in] first starting index of the particles
+   * @param[in] last ending index of the particles
+   * @param[out] logdet determinant matrix to be inverted
+   * @param[out] dlogdet gradients
+   * @param[out] d2logdet laplacians
+   *
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    ValueMatrix& d2logdet) = 0;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param d2logdet laplacians
+   * @param dspinlogdet, spin gradients
+   *
+   * default implementation will abort for all SPOSets except SpinorSet
+   *
+   */
+  virtual void evaluate_notranspose_spin(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet,
+                                         ValueMatrix& dspinlogdet);
+
+  virtual void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSet>& P_list,
+                                       int first,
+                                       int last,
+                                       const RefVector<ValueMatrix>& logdet_list,
+                                       const RefVector<GradMatrix>& dlogdet_list,
+                                       const RefVector<ValueMatrix>& d2logdet_list) const;
+
+  /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param grad_grad_logdet hessians
+   *
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet);
+
+  /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param grad_grad_logdet hessians
+   * @param grad_grad_grad_logdet third derivatives
+   *
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet,
+                                    GGGMatrix& grad_grad_grad_logdet);
+
+  /** evaluate the gradients of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi);
+
+  /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients of values
+   * @param grad_grad_phi gradients of gradients
+   * @param grad_lapl_phi gradients of laplacians
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& grad_phi,
+                                  HessMatrix& grad_grad_phi,
+                                  GradMatrix& grad_lapl_phi);
+
+  /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.  
+   *
+   *  @param[in] P particle set.
+   *  @param[in] iel The electron at which to evaluate phi(r_iel)
+   *  @param[in] source ion particle set.
+   *  @param[in] iat_src ion ID w.r.t. which to take derivative.
+   *  @param[in,out] gradphi Vector of d/dR_iat phi_j(r).
+   *  @return Void
+   */
+  virtual void evaluateGradSourceRow(const ParticleSet& P,
+                                     int iel,
+                                     const ParticleSet& source,
+                                     int iat_src,
+                                     GradVector& gradphi);
+
+  /** access the k point related to the given orbital */
+  virtual PosType get_k(int orb) { return PosType(); }
+
+  /** initialize a shared resource and hand it to collection
+   */
+  virtual void createResource(ResourceCollection& collection) const {}
+
+  /** acquire a shared resource from collection
+   */
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
+
+  /** return a shared resource to collection
+   */
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
+
+  /** make a clone of itself
+   * every derived class must implement this to have threading working correctly.
+   */
+  [[noreturn]] virtual std::unique_ptr<SPOSetT<T>> makeClone() const;
+
+  /** Used only by cusp correction in AOS LCAO.
+   * Ye: the SoA LCAO moves all this responsibility to the builder.
+   * This interface should be removed with AoS.
+   */
+  virtual bool transformSPOSet() { return true; }
+
+  /** finalize the construction of SPOSet
+   *
+   * for example, classes serving accelerators may need to transfer data from host to device
+   * after the host side objects are built.
+   */
+  virtual void finalizeConstruction() {}
+
+  /// return object name
+  const std::string& getName() const { return my_name_; }
+
+  /// return class name
+  virtual std::string getClassName() const = 0;
+
+protected:
+  /// name of the object, unique identifier
+  const std::string my_name_;
+  ///number of Single-particle orbitals
+  IndexType OrbitalSetSize;
+  /// Optimizable variables
+  opt_variables_type myVars;
+
+  friend opt_variables_type& testing::getMyVars(SPOSetT<float>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<double>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<float>>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<double>>& spo);
+};
+
+template<class T>
+using SPOSetTPtr = SPOSetT<T>*;
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
index 39e35c9c70..c162b9985c 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
@@ -20,6 +20,7 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/EinsplineSetBuilder.h"
 #include "QMCWaveFunctions/RotatedSPOs.h"
+#include "QMCWaveFunctions/SPOSetT.h"
 #include "checkMatrix.hpp"
 #include "FakeSPO.h"
 #include <ResourceCollection.h>
@@ -645,6 +646,10 @@ TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction]")
 namespace testing
 {
 opt_variables_type& getMyVars(SPOSet& rot) { return rot.myVars; }
+opt_variables_type& getMyVars(SPOSetT<float>& rot) { return rot.myVars; }
+opt_variables_type& getMyVars(SPOSetT<double>& rot) { return rot.myVars; }
+opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& rot) { return rot.myVars; }
+opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& rot) { return rot.myVars; }
 opt_variables_type& getMyVarsFull(RotatedSPOs& rot) { return rot.myVarsFull; }
 std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOs& rot) { return rot.history_params_; }
 } // namespace testing

From 7a7034a4e1aec167e594460bf1533a7a6441071d Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Mon, 24 Jul 2023 17:48:15 -0400
Subject: [PATCH 02/17] Mark todo for purely virtual functions

TODO when implementing derived classes
---
 src/QMCWaveFunctions/SPOSetT.cpp | 39 ++++++++++++++++++++++++++++++++
 src/QMCWaveFunctions/SPOSetT.h   | 17 ++++++++------
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
index 6d488d4bea..e61f4cace1 100644
--- a/src/QMCWaveFunctions/SPOSetT.cpp
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -206,6 +206,29 @@ void SPOSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>&
     spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]);
 }
 
+template<class T>
+void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet,
+                                      GGGMatrix& grad_grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n");
+}
+
 template<class T>
 std::unique_ptr<SPOSetT<T>> SPOSetT<T>::makeClone() const
 {
@@ -370,6 +393,22 @@ void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
                            "must be overloaded when the SPOSet has ion derivatives.");
 }
 
+template<class T>
+void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSet& source,
+                                    int iat_src,
+                                    GradMatrix& grad_phi,
+                                    HessMatrix& grad_grad_phi,
+                                    GradMatrix& grad_lapl_phi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSource "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
 template<class T>
 void SPOSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
                                        int iel,
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
index a985df0183..95643fd3c5 100644
--- a/src/QMCWaveFunctions/SPOSetT.h
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -193,15 +193,17 @@ class SPOSetT : public QMCTraits
    * @param norbs number of single-particle orbitals
    * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly.
    * It doesn't make sense allowing to set the value at any place in the code.
+   * @TODO make it purely virtual
    */
-  virtual void setOrbitalSetSize(int norbs) = 0;
+  virtual void setOrbitalSetSize(int norbs){};
 
   /** evaluate the values of this single-particle orbital set
    * @param P current ParticleSet
    * @param iat active particle
    * @param psi values of the SPO
+   * @TODO make it purely virtual
    */
-  virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) = 0;
+  virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi){};
 
   /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
    * @param VP virtual particle set
@@ -245,8 +247,9 @@ class SPOSetT : public QMCTraits
    * @param psi values of the SPO
    * @param dpsi gradients of the SPO
    * @param d2psi laplacians of the SPO
+   * @TODO make this purely virtual
    */
-  virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) = 0;
+  virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){};
 
   /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set
    * @param P current ParticleSet
@@ -393,14 +396,14 @@ class SPOSetT : public QMCTraits
    * @param[out] logdet determinant matrix to be inverted
    * @param[out] dlogdet gradients
    * @param[out] d2logdet laplacians
-   *
+   * @TODO make this pure virtual
    */
   virtual void evaluate_notranspose(const ParticleSet& P,
                                     int first,
                                     int last,
                                     ValueMatrix& logdet,
                                     GradMatrix& dlogdet,
-                                    ValueMatrix& d2logdet) = 0;
+                                    ValueMatrix& d2logdet){};
 
   /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient
    * @param P current ParticleSet
@@ -551,8 +554,8 @@ class SPOSetT : public QMCTraits
   /// return object name
   const std::string& getName() const { return my_name_; }
 
-  /// return class name
-  virtual std::string getClassName() const = 0;
+  /// @TODO make this purely virutal return class name
+  virtual std::string getClassName() const { return ""; };
 
 protected:
   /// name of the object, unique identifier

From 26ce50bc394f92fb559b12e1e5db1d636e416bb7 Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Fri, 4 Aug 2023 14:57:43 -0400
Subject: [PATCH 03/17] Add FakeSPOT class

Move SpinorSet to a templated class

Refactor FreeOrbital class

Base typed aliases on SPOSet<T> on OrbitalSetTraits<T>

Add FullRealType in SPOSet and RotatedSPOs

Add this in templated meta class

Add explicit function instantions for FreeOrbital

Add templated class SHOSetT

Signed-off-by: Steven Hahn <hahnse@ornl.gov>

Add PWRealOrbitalSetT template class

Revert test_RotatedSPOs.cpp
---
 src/QMCWaveFunctions/CMakeLists.txt           |    9 +-
 .../ElectronGas/FreeOrbitalT.cpp              |  718 +++++++
 .../ElectronGas/FreeOrbitalT.h                |   88 +
 .../HarmonicOscillator/SHOSetT.cpp            |  577 ++++++
 .../HarmonicOscillator/SHOSetT.h              |  158 ++
 .../PlaneWave/PWRealOrbitalSetT.cpp           |  165 ++
 .../PlaneWave/PWRealOrbitalSetT.h             |  143 ++
 src/QMCWaveFunctions/RotatedSPOsT.cpp         | 1690 +++++++++++++++++
 src/QMCWaveFunctions/RotatedSPOsT.h           |  420 ++++
 src/QMCWaveFunctions/SPOSetT.cpp              |    6 +-
 src/QMCWaveFunctions/SPOSetT.h                |    6 +-
 src/QMCWaveFunctions/SpinorSetT.cpp           |  586 ++++++
 src/QMCWaveFunctions/SpinorSetT.h             |  229 +++
 src/QMCWaveFunctions/tests/CMakeLists.txt     |    2 +-
 src/QMCWaveFunctions/tests/FakeSPOT.cpp       |  160 ++
 src/QMCWaveFunctions/tests/FakeSPOT.h         |   62 +
 .../tests/test_RotatedSPOs.cpp                |   63 +
 17 files changed, 5073 insertions(+), 9 deletions(-)
 create mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
 create mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
 create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
 create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
 create mode 100644 src/QMCWaveFunctions/RotatedSPOsT.cpp
 create mode 100644 src/QMCWaveFunctions/RotatedSPOsT.h
 create mode 100644 src/QMCWaveFunctions/SpinorSetT.cpp
 create mode 100644 src/QMCWaveFunctions/SpinorSetT.h
 create mode 100644 src/QMCWaveFunctions/tests/FakeSPOT.cpp
 create mode 100644 src/QMCWaveFunctions/tests/FakeSPOT.h

diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 11da0d4cf9..959e7743ae 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -35,16 +35,17 @@ set(WFBASE_SRCS
     SPOSetT.cpp
     CompositeSPOSet.cpp
     HarmonicOscillator/SHOSet.cpp
+    HarmonicOscillator/SHOSetT.cpp
     HarmonicOscillator/SHOSetBuilder.cpp
     ExampleHeBuilder.cpp
     ExampleHeComponent.cpp)
 
 if(NOT QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp)
 endif(NOT QMC_COMPLEX)
 
 if(QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp SpinorSetT.cpp)
 endif(QMC_COMPLEX)
 ########################
 # build jastrows
@@ -63,7 +64,7 @@ set(JASTROW_SRCS
 set(JASTROW_OMPTARGET_SRCS
     Jastrow/TwoBodyJastrow.cpp
     Jastrow/BsplineFunctor.cpp)
-set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalBuilder.cpp)
+set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalT.cpp ElectronGas/FreeOrbitalBuilder.cpp)
 
 # wavefunctions only availbale to 3-dim problems
 if(OHMMS_DIM MATCHES 3)
@@ -114,7 +115,7 @@ if(OHMMS_DIM MATCHES 3)
   if(QMC_COMPLEX)
     set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp)
   else()
-    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp PlaneWave/PWRealOrbitalSetT.cpp)
   endif(QMC_COMPLEX)
 
   if(NOT QMC_COMPLEX)
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
new file mode 100644
index 0000000000..81bc37cc79
--- /dev/null
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
@@ -0,0 +1,718 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "FreeOrbitalT.h"
+
+namespace qmcplusplus
+{
+
+template<class T>
+FreeOrbitalT<T>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart) : SPOSetT<T>(my_name)
+{}
+
+//Explicit template specialization
+template<>
+FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<float>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<double>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<float>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<double>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+
+template<class T>
+void FreeOrbitalT<T>::evaluateVGL(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& pvec,
+                                  GradVector& dpvec,
+                                  ValueVector& d2pvec)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluateVGL(const ParticleSet& P,
+                                      int iat,
+                                      ValueVector& pvec,
+                                      GradVector& dpvec,
+                                      ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
+}
+
+template<>
+void FreeOrbitalT<double>::evaluateVGL(const ParticleSet& P,
+                                       int iat,
+                                       ValueVector& pvec,
+                                       GradVector& dpvec,
+                                       ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
+}
+
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateVGL(const ParticleSet& P,
+                                                    int iat,
+                                                    ValueVector& pvec,
+                                                    GradVector& dpvec,
+                                                    ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateVGL(const ParticleSet& P,
+                                                     int iat,
+                                                     ValueVector& pvec,
+                                                     GradVector& dpvec,
+                                                     ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
+}
+
+
+template<>
+void FreeOrbitalT<float>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
+}
+
+template<>
+void FreeOrbitalT<double>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
+}
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]     = std::complex<float>(coskr, sinkr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]     = std::complex<double>(coskr, sinkr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat)
+{}
+
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  float phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
+        }
+      }
+    }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  double phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
+        }
+      }
+    }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
+}
+
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  std::complex<float> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<float>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+    }
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  std::complex<double> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<double>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+    }
+  }
+}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat,
+                                           GGGMatrix& d3phi_mat)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat,
+                                               GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
+        }
+      }
+    }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat,
+                                                GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
+        }
+      }
+    }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat,
+                                                             GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
+    }
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat,
+                                                              GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
+    }
+  }
+}
+
+// generic implementation
+
+template<class T>
+FreeOrbitalT<T>::~FreeOrbitalT()
+{}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           ValueMatrix& d2phi)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    ValueVector d2p(d2phi[i], this->OrbitalSetSize);
+    evaluateVGL(P, iat, p, dp, d2p);
+  }
+}
+
+
+template<class T>
+void FreeOrbitalT<T>::report(const std::string& pad) const
+{
+  app_log() << pad << "FreeOrbital report" << std::endl;
+  for (int ik = 0; ik < kvecs.size(); ik++)
+  {
+    app_log() << pad << ik << " " << kvecs[ik] << std::endl;
+  }
+  app_log() << pad << "end FreeOrbital report" << std::endl;
+  app_log().flush();
+}
+
+template class FreeOrbitalT<float>;
+template class FreeOrbitalT<double>;
+template class FreeOrbitalT<std::complex<float>>;
+template class FreeOrbitalT<std::complex<double>>;
+
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
new file mode 100644
index 0000000000..c73ab26a2a
--- /dev/null
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
@@ -0,0 +1,88 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_FREE_ORBITAL
+#define QMCPLUSPLUS_FREE_ORBITAL
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class FreeOrbitalT : public SPOSetT<T>
+{
+public:
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueType   = typename SPOSetT<T>::ValueType;
+
+  FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart);
+  ~FreeOrbitalT();
+
+  inline std::string getClassName() const final { return "FreeOrbital"; }
+
+  // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
+  //  i \in [first, last)
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            ValueMatrix& d2phi) final;
+
+  // plug r_i into all orbitals
+  void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final;
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) final;
+
+  // hessian matrix is needed by backflow
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat) final;
+
+  // derivative of hessian is needed to optimize backflow
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat,
+                            GGGMatrix& d3phi_mat) override;
+
+  void report(const std::string& pad) const override;
+  // ---- begin required overrides
+  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<FreeOrbitalT<T>>(*this); }
+  void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); }
+  // required overrides end ----
+private:
+  const std::vector<PosType> kvecs; // kvecs vectors
+  const int mink;                   // minimum k index
+  const int maxk;                   // maximum number of kvecs vectors
+  std::vector<RealType> k2neg;      // minus kvecs^2
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
new file mode 100644
index 0000000000..76a606151d
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
@@ -0,0 +1,577 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SHOSetT.h"
+#include "Utilities/string_utils.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states)
+    : SPOSetT<T>(my_name), length(l), center(c)
+{
+  state_info.resize(sho_states.size());
+  for (int s = 0; s < sho_states.size(); ++s)
+    state_info[s] = *sho_states[s];
+  initialize();
+}
+
+template <typename T>
+void SHOSetT<T>::initialize()
+{
+  using std::sqrt;
+
+  this->OrbitalSetSize = state_info.size();
+
+  qn_max = -1;
+  for (int s = 0; s < state_info.size(); ++s)
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
+  qn_max += 1;
+
+  nmax = -1;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+    nmax = std::max(nmax, qn_max[d]);
+
+  prefactors.resize(nmax);
+  hermite.resize(QMCTraits::DIM, nmax);
+  bvalues.resize(QMCTraits::DIM, nmax);
+
+  if (nmax > 0)
+  {
+    prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+    for (int n = 1; n < nmax; ++n)
+      prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
+  }
+}
+
+template <typename T>
+SHOSetT<T>::~SHOSetT() = default;
+
+template <typename T>
+std::unique_ptr<SPOSetT<T>> SHOSetT<T>::makeClone() const { return std::make_unique<SHOSetT<T>>(*this); }
+
+template <typename T>
+void SHOSetT<T>::report(const std::string& pad) const
+{
+  app_log() << pad << "SHOSet report" << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  qn_max    = " << qn_max << std::endl;
+  app_log() << pad << "  # states  = " << state_info.size() << std::endl;
+  app_log() << pad << "  states" << std::endl;
+  for (int s = 0; s < state_info.size(); ++s)
+    state_info[s].sho_report(pad + "    " + int2string(s) + " ");
+  app_log() << pad << "end SHOSet report" << std::endl;
+  app_log().flush();
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  evaluate_v(r, p);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  GradVector dp(&dpsi[0], this->size());
+  ValueVector d2p(&d2psi[0], this->size());
+  evaluate_vgl(r, p, dp, d2p);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector p(logdet[i], this->size());
+    GradVector dp(dlogdet[i], this->size());
+    ValueVector d2p(d2logdet[i], this->size());
+    evaluate_vgl(P.R[iat], p, dp, d2p);
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
+{
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
+  evaluate_d1(x, psi, dpsi);
+  evaluate_d2(x, psi, d2psi);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_hermite(const PosType& xpos)
+{
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    int nh = qn_max[d];
+    if (nh > 0)
+    {
+      RealType x    = xpos[d];
+      hermite(d, 0) = 1.0;
+      RealType Hnm2 = 0.0;
+      RealType Hnm1 = 1.0;
+      for (int n = 1; n < nh; ++n)
+      {
+        RealType Hn   = 2 * (x * Hnm1 - (n - 1) * Hnm2);
+        hermite(d, n) = Hn;
+        Hnm2          = Hnm1;
+        Hnm1          = Hn;
+      }
+    }
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
+{
+  using std::exp;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x = xpos[d];
+    RealType g = exp(-.5 * x * x);
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = prefactors[n] * g * hermite(d, n);
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    RealType phi          = 1.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      phi *= bvalues(d, state.quantum_number[d]);
+    psi[s] = phi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
+{
+  RealType ol = 1.0 / length;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x    = xpos[d];
+    RealType Hnm1 = 0.0;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      RealType Hn   = hermite(d, n);
+      bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
+      Hnm1          = Hn;
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    TinyVector<T, QMCTraits::DIM> dphi;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      dphi[d] = bvalues(d, state.quantum_number[d]);
+    dphi *= psi[s];
+    dpsi[s] = dphi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
+{
+  RealType ol2 = 1.0 / (length * length);
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = xpos[d];
+    RealType x2 = x * x;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    T d2phi       = 0.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      d2phi += bvalues(d, state.quantum_number[d]);
+    d2phi *= psi[s];
+    d2psi[s] = d2phi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  using std::exp;
+  using std::sqrt;
+
+  evaluate_vgl(r, psi, dpsi, d2psi);
+
+  const int N = 6;
+  RealType H[N], dH[N], d2H[N], pre[N];
+  RealType p[N], dp[N], d2p[N];
+
+  pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+  for (int n = 1; n < N; ++n)
+    pre[n] = pre[n - 1] / sqrt(2. * n);
+
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = (r[d] - center[d]) / length;
+    RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x;
+    H[0]       = 1;
+    dH[0]      = 0;
+    d2H[0]     = 0;
+    H[1]       = 2 * x;
+    dH[1]      = 2;
+    d2H[1]     = 0;
+    H[2]       = 4 * x2 - 2;
+    dH[2]      = 8 * x;
+    d2H[2]     = 8;
+    H[3]       = 8 * x3 - 12 * x;
+    dH[3]      = 24 * x2 - 12;
+    d2H[3]     = 48 * x;
+    H[4]       = 16 * x4 - 48 * x2 + 12;
+    dH[4]      = 64 * x3 - 96 * x;
+    d2H[4]     = 192 * x2 - 96;
+    H[5]       = 32 * x5 - 160 * x3 + 120 * x;
+    dH[5]      = 160 * x4 - 480 * x2 + 120;
+    d2H[5]     = 640 * x3 - 960 * x;
+    RealType g = exp(-x2 / 2);
+    for (int n = 0; n < N; ++n)
+    {
+      p[n]   = pre[n] * g * H[n];
+      dp[n]  = pre[n] * g * (-x * H[n] + dH[n]);
+      d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
+    }
+    app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
+    app_log() << "  hermite check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << H[n] << std::endl;
+      app_log() << "    " << n << " " << hermite(d, n) << std::endl;
+    }
+    app_log() << "  phi d0 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << p[n] << std::endl;
+      app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d1 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d2 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
+    }
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::test_derivatives()
+{
+  int n       = 3;
+  PosType c   = 5.123;
+  PosType L   = 1.0;
+  PosType drg = L / n;
+  PosType dr  = L / 1000;
+  int nphi    = state_info.size();
+
+  PosType o2dr, odr2;
+
+  ValueVector vpsi, vpsitmp;
+  GradVector vdpsi, vdpsin;
+  ValueVector vd2psi, vd2psin;
+
+
+  vpsi.resize(nphi);
+  vdpsi.resize(nphi);
+  vd2psi.resize(nphi);
+
+  vpsitmp.resize(nphi);
+  vdpsin.resize(nphi);
+  vd2psin.resize(nphi);
+
+
+  ValueVector psi(&vpsi[0], this->size());
+  GradVector dpsi(&vdpsi[0], this->size());
+  ValueVector d2psi(&vd2psi[0], this->size());
+
+  ValueVector psitmp(&vpsitmp[0], this->size());
+  GradVector dpsin(&vdpsin[0], this->size());
+  ValueVector d2psin(&vd2psin[0], this->size());
+
+
+  app_log() << " loading dr" << std::endl;
+
+  RealType odr2sum = 0.0;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType odr = 1.0 / dr[d];
+    o2dr[d]      = .5 * odr;
+    odr2[d]      = odr * odr;
+    odr2sum += odr2[d];
+  }
+
+  app_log() << "SHOSet::test_derivatives" << std::endl;
+
+  const SimulationCell simulation_cell;
+  ParticleSet Ps(simulation_cell);
+
+  int p = 0;
+  PosType r, rtmp;
+  for (int i = 0; i < n; ++i)
+  {
+    r[0] = c[0] + i * drg[0];
+    for (int j = 0; j < n; ++j)
+    {
+      r[1] = c[1] + j * drg[1];
+      for (int k = 0; k < n; ++k)
+      {
+        r[2] = c[2] + k * drg[2];
+
+        evaluate_vgl(r, psi, dpsi, d2psi);
+
+        for (int m = 0; m < nphi; ++m)
+          d2psin[m] = -2 * odr2sum * psi[m];
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+        {
+          rtmp = r;
+          rtmp[d] += dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi = psitmp[m];
+            dpsin[m][d]   = phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+          rtmp = r;
+          rtmp[d] -= dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi = psitmp[m];
+            dpsin[m][d] -= phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+        }
+
+        RealType dphi_diff  = 0.0;
+        RealType d2phi_diff = 0.0;
+        for (int m = 0; m < nphi; ++m)
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            dphi_diff = std::max<RealType>(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d]));
+        for (int m = 0; m < nphi; ++m)
+          d2phi_diff = std::max<RealType>(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
+        app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff << std::endl;
+        app_log() << "    derivatives" << std::endl;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsi[m][d]) << " ";
+          app_log() << std::endl;
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsin[m][d]) << " ";
+          app_log() << std::endl;
+        }
+        app_log() << "    laplacians" << std::endl;
+        PosType x = r / length;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
+          app_log() << "    " << qn << real(d2psin[m] / psi[m]) << std::endl;
+        }
+        p++;
+      }
+    }
+  }
+
+  app_log() << "end SHOSet::test_derivatives" << std::endl;
+}
+
+template <typename T>
+void SHOSetT<T>::test_overlap()
+{
+  app_log() << "SHOSet::test_overlap" << std::endl;
+
+
+  //linear
+  int d = 0;
+
+  app_log() << "  length = " << length << std::endl;
+  app_log() << "  prefactors" << std::endl;
+  for (int n = 0; n < qn_max[d]; ++n)
+    app_log() << "    " << n << " " << prefactors[n] << std::endl;
+
+  app_log() << "  1d overlap" << std::endl;
+
+  ValueVector vpsi;
+  vpsi.resize(this->size());
+  ValueVector psi(&vpsi[0], this->size());
+
+  double xmax = 4.0;
+  double dx   = .1;
+  double dr   = length * dx;
+
+  int nphi = qn_max[d];
+  Array<double, 2> omat;
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+
+  PosType xp = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+  {
+    xp[d] = x;
+    evaluate_hermite(xp);
+    evaluate_d0(xp, psi);
+
+    for (int i = 0; i < nphi; ++i)
+      for (int j = 0; j < nphi; ++j)
+        omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
+  }
+
+  for (int i = 0; i < nphi; ++i)
+  {
+    app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
+
+
+  //volumetric
+  app_log() << "  3d overlap" << std::endl;
+  double dV = dr * dr * dr;
+  nphi      = this->size();
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+    for (double y = -xmax; y < xmax; y += dx)
+      for (double z = -xmax; z < xmax; z += dx)
+      {
+        xp[0] = x;
+        xp[1] = y;
+        xp[2] = z;
+        evaluate_hermite(xp);
+        evaluate_d0(xp, psi);
+
+        for (int i = 0; i < nphi; ++i)
+          for (int j = 0; j < nphi; ++j)
+            omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
+      }
+  for (int i = 0; i < nphi; ++i)
+  {
+    app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
+
+
+  app_log() << "end SHOSet::test_overlap" << std::endl;
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+{
+  not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& grad_grad_logdet)
+{
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& grad_grad_logdet,
+                                  GGGMatrix& grad_grad_grad_logdet)
+{
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                int first,
+                                int last,
+                                const ParticleSet& source,
+                                int iat_src,
+                                GradMatrix& gradphi)
+{
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                int first,
+                                int last,
+                                const ParticleSet& source,
+                                int iat_src,
+                                GradMatrix& grad_phi,
+                                HessMatrix& grad_grad_phi,
+                                GradMatrix& grad_lapl_phi)
+{
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
+}
+
+// Class concrete types from ValueType
+template class SHOSetT<double>;
+template class SHOSetT<float>;
+template class SHOSetT<std::complex<double>>;
+template class SHOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
new file mode 100644
index 0000000000..bd4870a63c
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
@@ -0,0 +1,158 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SHOSET_H
+#define QMCPLUSPLUS_SHOSET_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/SPOInfo.h"
+
+namespace qmcplusplus
+{
+struct SHOState : public SPOInfo
+{
+  TinyVector<int, QMCTraits::DIM> quantum_number;
+
+  SHOState()
+  {
+    quantum_number = -1;
+    energy         = 0.0;
+  }
+
+  ~SHOState() override {}
+
+  inline void set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
+  {
+    quantum_number = qn;
+    energy         = e;
+  }
+
+  inline void sho_report(const std::string& pad = "") const
+  {
+    app_log() << pad << "qn=" << quantum_number << "  e=" << energy << std::endl;
+  }
+};
+
+template<typename T>
+class SHOSetT : public SPOSetT<T>
+{
+public:
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using value_type  = typename ValueMatrix::value_type;
+  using grad_type   = typename GradMatrix::value_type;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = TinyVector<RealType, QMCTraits::DIM>;
+  using HessType    = typename OrbitalSetTraits<T>::HessType;
+  using HessMatrix  = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGType     = TinyVector<HessType, OHMMS_DIM>;
+  using GGGVector   = Vector<GGGType>;
+  using GGGMatrix   = Matrix<GGGType>;
+
+  RealType length;
+  PosType center;
+
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> qn_max;
+  std::vector<SHOState> state_info;
+  std::vector<RealType> prefactors;
+  Array<RealType, 2> hermite;
+  Array<RealType, 2> bvalues;
+  Array<RealType, 2> d0_values;
+  Array<RealType, 2> d1_values;
+  Array<RealType, 2> d2_values;
+
+  //construction/destruction
+  SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states);
+
+  ~SHOSetT() override;
+
+  std::string getClassName() const override { return "SHOSet"; }
+
+  void initialize();
+
+  //SPOSet interface methods
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+
+  //local functions
+  void evaluate_v(PosType r, ValueVector& psi);
+  void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+  void evaluate_hermite(const PosType& xpos);
+  void evaluate_d0(const PosType& xpos, ValueVector& psi);
+  void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
+  void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
+  void report(const std::string& pad = "") const override;
+  void test_derivatives();
+  void test_overlap();
+  void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  //empty methods
+  /// number of orbitals is determined only by initial request
+  inline void setOrbitalSetSize(int norbs) override {}
+
+  ///unimplemented functions call this to abort
+  inline void not_implemented(const std::string& method)
+  {
+    APP_ABORT("SHOSet::" + method + " has not been implemented.");
+  }
+
+
+  //methods to be implemented in the future (possibly)
+  void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override;
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet) override;
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet,
+                            GGGMatrix& dddlogdet) override;
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override;
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& dphi,
+                          HessMatrix& ddphi,
+                          GradMatrix& dlapl_phi) override;
+};
+
+} // namespace qmcplusplus
+
+
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp
new file mode 100644
index 0000000000..3286624090
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp
@@ -0,0 +1,165 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWRealOrbitalSetT.cpp
+ * @brief declaration of the member functions of PWRealOrbitalSetT
+ *
+ * Not the most optimized method to use wavefunctions in a plane-wave basis.
+ */
+#include "Message/Communicate.h"
+#include "PWRealOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+#include "type_traits/ConvertToReal.h"
+
+namespace qmcplusplus
+{
+template<class T>
+PWRealOrbitalSetT<T>::~PWRealOrbitalSetT()
+{
+  if (OwnBasisSet && myBasisSet)
+    delete myBasisSet;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> PWRealOrbitalSetT<T>::makeClone() const
+{
+  auto myclone        = std::make_unique<PWRealOrbitalSetT<T>>(*this);
+  myclone->myBasisSet = new PWBasis(*(this->myBasisSet));
+  return myclone;
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{}
+
+template<class T>
+void PWRealOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+{
+  myBasisSet           = bset;
+  this->OrbitalSetSize = nbands;
+  OwnBasisSet          = cleanup;
+  BasisSetSize         = myBasisSet->NumPlaneWaves;
+  CC.resize(this->OrbitalSetSize, BasisSetSize);
+  Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+  tempPsi.resize(this->OrbitalSetSize);
+  app_log() << "  PWRealOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize
+            << " BasisSetSize = " << BasisSetSize << std::endl;
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      CC[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      CC[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  myBasisSet->evaluate(P.activeR(iat));
+  MatrixOperators::product(CC, myBasisSet->Zv, tempPsi);
+  for (int j = 0; j < this->OrbitalSetSize; j++)
+    psi[j] = tempPsi[j].real();
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluateVGL(const ParticleSet& P,
+                                       int iat,
+                                       ValueVector& psi,
+                                       GradVector& dpsi,
+                                       ValueVector& d2psi)
+{
+  myBasisSet->evaluateAll(P, iat);
+  MatrixOperators::product(CC, myBasisSet->Z, Temp);
+  const ComplexType* restrict tptr = Temp.data();
+  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+  {
+    psi[j]   = tptr[PW_VALUE].real();
+    d2psi[j] = tptr[PW_LAP].real();
+#if OHMMS_DIM == 3
+    dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real());
+#elif OHMMS_DIM == 2
+    dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real());
+#elif OHMMS_DIM == 1
+    dpsi[j] = GradType(tptr[PW_GRADX].real());
+#else
+#error "Only physical dimensions 1/2/3 are supported."
+#endif
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& logdet,
+                                                GradMatrix& dlogdet,
+                                                ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    myBasisSet->evaluateAll(P, iat);
+    MatrixOperators::product(CC, myBasisSet->Z, Temp);
+    const ComplexType* restrict tptr = Temp.data();
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+    {
+      convertToReal(tptr[PW_VALUE], logdet(i, j));
+      convertToReal(tptr[PW_LAP], d2logdet(i, j));
+#if OHMMS_DIM == 3
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real());
+#elif OHMMS_DIM == 2
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real());
+#elif OHMMS_DIM == 1
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real());
+#else
+#error "Only physical dimensions 1/2/3 are supported."
+#endif
+    }
+  }
+}
+
+template class SPOSetT<double>;
+template class SPOSetT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
new file mode 100644
index 0000000000..29e484f3ff
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
@@ -0,0 +1,143 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWRealOrbitalSetT.h
+ * @brief Define PWRealOrbitalSetT derived from SPOSetT
+ *
+ * This is a specialized single-particle orbital set for real trial
+ * wavefunctions and enabled with QMC_COMPLEX=0
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVE_REALORBITALSET_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_REALORBITALSET_BLAS_H
+
+#include "QMCWaveFunctions/PlaneWave/PWBasis.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "CPU/BLAS.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+class PWRealOrbitalSetT : public SPOSetT<T>
+{
+public:
+  using BasisSet_t = PWBasis;
+  using PWBasisPtr = PWBasis*;
+
+  using IndexType   = typename SPOSetT<T>::IndexType;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = typename SPOSetT<T>::ComplexType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using PosType     = typename SPOSetT<T>::PosType;
+
+  /** inherit the enum of BasisSet_t */
+  enum
+  {
+    PW_VALUE    = BasisSet_t::PW_VALUE,
+    PW_LAP      = BasisSet_t::PW_LAP,
+    PW_GRADX    = BasisSet_t::PW_GRADX,
+    PW_GRADY    = BasisSet_t::PW_GRADY,
+    PW_GRADZ    = BasisSet_t::PW_GRADZ,
+    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+  };
+
+  /** default constructor
+  */
+  PWRealOrbitalSetT(const std::string& my_name)
+      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0)
+  {}
+
+  std::string getClassName() const override { return "PWRealOrbitalSetT"; }
+
+  /** delete BasisSet only it owns this
+   *
+   * Builder takes care of who owns what
+   */
+  ~PWRealOrbitalSetT() override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  /** resize  the orbital base
+   * @param bset PWBasis
+   * @param nbands number of bands
+   * @param cleaup if true, owns PWBasis. Will clean up.
+   */
+  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+
+  /** add eigenstate for jorb-th orbital
+   * @param coefs real input data
+   * @param jorb orbital index
+   */
+  void addVector(const std::vector<RealType>& coefs, int jorb);
+
+  /** add eigenstate for jorb-th orbital
+   * @param coefs complex input data
+   * @param jorb orbital index
+   */
+  void addVector(const std::vector<ComplexType>& coefs, int jorb);
+
+  void setOrbitalSetSize(int norbs) override;
+
+  inline T evaluate(int ib, const PosType& pos)
+  {
+    myBasisSet->evaluate(pos);
+    return real(BLAS::dot(BasisSetSize, CC[ib], myBasisSet->Zv.data()));
+  }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    APP_ABORT("Need specialization of evaluate_notranspose() for grad_grad_logdet. \n");
+  }
+
+
+  /** boolean
+   *
+   * If true, this has to delete the BasisSet
+   */
+  bool OwnBasisSet;
+  ///TwistAngle of this PWRealOrbitalSet
+  PosType TwistAngle;
+  ///My basis set
+  PWBasisPtr myBasisSet;
+  ///number of basis
+  IndexType BasisSetSize;
+  ///Plane-wave coefficients of complex: (iband,g-vector)
+  Matrix<ComplexType> CC;
+  /// temporary array to perform gemm operation
+  Matrix<ComplexType> Temp;
+  ///temporary complex vector before assigning to a real psi
+  Vector<ComplexType> tempPsi;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
new file mode 100644
index 0000000000..5a992ebce8
--- /dev/null
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -0,0 +1,1690 @@
+//////////////////////////////////////////////////////////////////////////////////////
+//// This file is distributed under the University of Illinois/NCSA Open Source
+/// License. / See LICENSE file in top directory for details.
+////
+//// Copyright (c) QMCPACK developers.
+////
+//// File developed by: Sergio D. Pineda Flores,
+/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
+/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
+/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+////
+//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
+/// University of California, Berkeley
+////////////////////////////////////////////////////////////////////////////////////////
+#include "RotatedSPOsT.h"
+
+#include "CPU/BLAS.hpp"
+#include "Numerics/DeterminantOperators.h"
+#include "Numerics/MatrixOperators.h"
+#include "io/hdf/hdf_archive.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
+    : SPOSetT<T>(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
+{
+  this->OrbitalSetSize = Phi->getOrbitalSetSize();
+}
+
+template<typename T>
+RotatedSPOsT<T>::~RotatedSPOsT()
+{}
+
+template<typename T>
+void RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
+{
+  params          = param_list;
+  params_supplied = true;
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
+{
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices)
+{
+  rot_indices.reserve(nmo * (nmo - 1) / 2);
+
+  // start with core-active rotations - put them at the beginning of the list
+  // so it matches the other list of rotation indices
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add core-core rotations - put them at the end of the list
+  for (int i = 0; i < nel; i++)
+    for (int j = i + 1; j < nel; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add active-active rotations - put them at the end of the list
+  for (int i = nel; i < nmo; i++)
+    for (int j = i + 1; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const std::vector<RealType>& param,
+                                                   ValueMatrix& rot_mat)
+{
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
+
+  rot_mat = 0.0;
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p      = rot_indices[i].first;
+    const int q      = rot_indices[i].second;
+    const RealType x = param[i];
+
+    rot_mat[q][p] = x;
+    rot_mat[p][q] = -x;
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                           const ValueMatrix& rot_mat,
+                                                           std::vector<RealType>& param)
+{
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p = rot_indices[i].first;
+    const int q = rot_indices[i].second;
+    param[i]    = rot_mat[q][p];
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::resetParametersExclusive(const opt_variables_type& active)
+{
+  std::vector<RealType> delta_param(m_act_rot_inds.size());
+
+  size_t psize = m_act_rot_inds.size();
+
+  if (use_global_rot_)
+  {
+    psize = m_full_rot_inds.size();
+    assert(psize >= m_act_rot_inds.size());
+  }
+
+  std::vector<RealType> old_param(psize);
+  std::vector<RealType> new_param(psize);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int loc         = this->myVars.where(i);
+    delta_param[i]  = active[loc] - this->myVars[i];
+    this->myVars[i] = active[loc];
+  }
+
+  if (use_global_rot_)
+  {
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      old_param[i] = myVarsFull[i];
+
+    applyDeltaRotation(delta_param, old_param, new_param);
+
+    // Save the the params
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      myVarsFull[i] = new_param[i];
+  }
+  else
+  {
+    apply_rotation(delta_param, false);
+
+    // Save the parameters in the history list
+    history_params_.push_back(delta_param);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
+{
+  hout.push("RotatedSPOsT");
+  if (use_global_rot_)
+  {
+    hout.push("rotation_global");
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+
+    int nparam_full = myVarsFull.size();
+    std::vector<RealType> full_params(nparam_full);
+    for (int i = 0; i < nparam_full; i++)
+      full_params[i] = myVarsFull[i];
+
+    hout.write(full_params, rot_global_name);
+    hout.pop();
+  }
+  else
+  {
+    hout.push("rotation_history");
+    size_t rows = history_params_.size();
+    size_t cols = 0;
+    if (rows > 0)
+      cols = history_params_[0].size();
+
+    Matrix<RealType> tmp(rows, cols);
+    for (size_t i = 0; i < rows; i++)
+      for (size_t j = 0; j < cols; j++)
+        tmp(i, j) = history_params_[i][j];
+
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    hout.write(tmp, rot_hist_name);
+    hout.pop();
+  }
+
+  // Save myVars in order to restore object state exactly
+  //  The values aren't meaningful, but they need to match those saved in
+  //  VariableSet
+  hout.push("rotation_params");
+  std::string rot_params_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+
+  int nparam = this->myVars.size();
+  std::vector<RealType> params(nparam);
+  for (int i = 0; i < nparam; i++)
+    params[i] = this->myVars[i];
+
+  hout.write(params, rot_params_name);
+  hout.pop();
+
+  hout.pop();
+}
+
+template<typename T>
+void RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
+{
+  hin.push("RotatedSPOsT", false);
+
+  bool grp_hist_exists   = hin.is_group("rotation_history");
+  bool grp_global_exists = hin.is_group("rotation_global");
+  if (!grp_hist_exists && !grp_global_exists)
+    app_warning() << "Rotation parameters not found in VP file";
+
+  if (grp_global_exists)
+  {
+    hin.push("rotation_global", false);
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+
+    std::vector<int> sizes(1);
+    if (!hin.getShape<RealType>(rot_global_name, sizes))
+      throw std::runtime_error("Failed to read rotation_global in VP file");
+
+    int nparam_full_actual = sizes[0];
+    int nparam_full        = myVarsFull.size();
+
+    if (nparam_full != nparam_full_actual)
+    {
+      std::ostringstream tmp_err;
+      tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file ("
+              << nparam_full_actual << ")";
+      throw std::runtime_error(tmp_err.str());
+    }
+    std::vector<RealType> full_params(nparam_full);
+    hin.read(full_params, rot_global_name);
+    for (int i = 0; i < nparam_full; i++)
+      myVarsFull[i] = full_params[i];
+
+    hin.pop();
+
+    applyFullRotation(full_params, true);
+  }
+  else if (grp_hist_exists)
+  {
+    hin.push("rotation_history", false);
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    std::vector<int> sizes(2);
+    if (!hin.getShape<RealType>(rot_hist_name, sizes))
+      throw std::runtime_error("Failed to read rotation history in VP file");
+
+    int rows = sizes[0];
+    int cols = sizes[1];
+    history_params_.resize(rows);
+    Matrix<RealType> tmp(rows, cols);
+    hin.read(tmp, rot_hist_name);
+    for (size_t i = 0; i < rows; i++)
+    {
+      history_params_[i].resize(cols);
+      for (size_t j = 0; j < cols; j++)
+        history_params_[i][j] = tmp(i, j);
+    }
+
+    hin.pop();
+
+    applyRotationHistory();
+  }
+
+  hin.push("rotation_params", false);
+  std::string rot_param_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+
+  std::vector<int> sizes(1);
+  if (!hin.getShape<RealType>(rot_param_name, sizes))
+    throw std::runtime_error("Failed to read rotation_params in VP file");
+
+  int nparam_actual = sizes[0];
+  int nparam        = this->myVars.size();
+  if (nparam != nparam_actual)
+  {
+    std::ostringstream tmp_err;
+    tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file ("
+            << nparam_actual << ")";
+    throw std::runtime_error(tmp_err.str());
+  }
+
+  std::vector<RealType> params(nparam);
+  hin.read(params, rot_param_name);
+  for (int i = 0; i < nparam; i++)
+    this->myVars[i] = params[i];
+
+  hin.pop();
+
+  hin.pop();
+}
+
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+{
+#if !defined(QMC_COMPLEX)
+  /* Only rebuild optimized variables if more after-rotation orbitals are
+	 * needed Consider ROHF, there is only one set of SPO for both spin up and
+	 * down Nup > Ndown. nel_major_ will be set Nup.
+	 *
+	 * Use the size of myVars as a flag to avoid building the rotation
+	 * parameters again when a clone is made (the DiracDeterminant constructor
+	 * calls buildOptVariables)
+	 */
+  if (nel > nel_major_ && this->myVars.size() == 0)
+  {
+    nel_major_ = nel;
+
+    const size_t nmo = Phi->getOrbitalSetSize();
+
+    // create active rotation parameter indices
+    RotationIndices created_m_act_rot_inds;
+
+    RotationIndices created_full_rot_inds;
+    if (use_global_rot_)
+      createRotationIndicesFull(nel, nmo, created_full_rot_inds);
+
+    createRotationIndices(nel, nmo, created_m_act_rot_inds);
+
+    buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
+  }
+#endif
+}
+
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations)
+{
+#if !defined(QMC_COMPLEX)
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  // create active rotations
+  m_act_rot_inds = rotations;
+
+  if (use_global_rot_)
+    m_full_rot_inds = full_rotations;
+
+  if (use_global_rot_)
+    app_log() << "Orbital rotation using global rotation" << std::endl;
+  else
+    app_log() << "Orbital rotation using history" << std::endl;
+
+  // This will add the orbital rotation parameters to myVars
+  // and will also read in initial parameter values supplied in input file
+  int p, q;
+  int nparams_active = m_act_rot_inds.size();
+
+  app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl;
+  if (params_supplied)
+    if (nparams_active != params.size())
+      throw std::runtime_error("The number of supplied orbital rotation parameters does not "
+                               "match number prdouced by the slater "
+                               "expansion. \n");
+
+  this->myVars.clear();
+  for (int i = 0; i < nparams_active; i++)
+  {
+    p = m_act_rot_inds[i].first;
+    q = m_act_rot_inds[i].second;
+    std::stringstream sstr;
+    sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "")
+         << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q;
+
+    // If the user input parameters, use those. Otherwise, initialize the
+    // parameters to zero
+    if (params_supplied)
+    {
+      this->myVars.insert(sstr.str(), params[i]);
+    }
+    else
+    {
+      this->myVars.insert(sstr.str(), 0.0);
+    }
+  }
+
+  if (use_global_rot_)
+  {
+    myVarsFull.clear();
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+    {
+      p = m_full_rot_inds[i].first;
+      q = m_full_rot_inds[i].second;
+      std::stringstream sstr;
+      sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "")
+           << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "")
+           << q;
+
+      if (params_supplied && i < m_act_rot_inds.size())
+        myVarsFull.insert(sstr.str(), params[i]);
+      else
+        myVarsFull.insert(sstr.str(), 0.0);
+    }
+  }
+
+  // Printing the parameters
+  if (true)
+  {
+    app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n";
+    this->myVars.print(app_log());
+  }
+
+  if (params_supplied)
+  {
+    std::vector<RealType> param(m_act_rot_inds.size());
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+      param[i] = this->myVars[i];
+    apply_rotation(param, false);
+  }
+#endif
+}
+
+template<typename T>
+void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool use_stored_copy)
+{
+  assert(param.size() == m_act_rot_inds.size());
+
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
+
+  /*
+	  rot_mat is now an anti-hermitian matrix. Now we convert
+	  it into a unitary matrix via rot_mat = exp(-rot_mat).
+	  Finally, apply unitary matrix to orbs.
+	*/
+  exponentiate_antisym_matrix(rot_mat);
+  Phi->applyRotation(rot_mat, use_stored_copy);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
+                                         const std::vector<RealType>& old_param,
+                                         std::vector<RealType>& new_param)
+{
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix new_rot_mat(nmo, nmo);
+  constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
+
+  Phi->applyRotation(new_rot_mat, true);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                             const std::vector<RealType>& old_param,
+                                             const RotationIndices& act_rot_inds,
+                                             const RotationIndices& full_rot_inds,
+                                             std::vector<RealType>& new_param,
+                                             ValueMatrix& new_rot_mat)
+{
+  assert(delta_param.size() == act_rot_inds.size());
+  assert(old_param.size() == full_rot_inds.size());
+  assert(new_param.size() == full_rot_inds.size());
+
+  const size_t nmo = new_rot_mat.rows();
+  assert(new_rot_mat.rows() == new_rot_mat.cols());
+
+  ValueMatrix old_rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
+  exponentiate_antisym_matrix(old_rot_mat);
+
+  ValueMatrix delta_rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
+  exponentiate_antisym_matrix(delta_rot_mat);
+
+  // Apply delta rotation to old rotation.
+  BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(),
+             nmo);
+
+  ValueMatrix log_rot_mat(nmo, nmo);
+  log_antisym_matrix(new_rot_mat, log_rot_mat);
+  extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy)
+{
+  assert(full_param.size() == m_full_rot_inds.size());
+
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
+  rot_mat = T(0);
+
+  constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
+
+  /*
+	  rot_mat is now an anti-hermitian matrix. Now we convert
+	  it into a unitary matrix via rot_mat = exp(-rot_mat).
+	  Finally, apply unitary matrix to orbs.
+	*/
+  exponentiate_antisym_matrix(rot_mat);
+  Phi->applyRotation(rot_mat, use_stored_copy);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyRotationHistory()
+{
+  for (auto delta_param : history_params_)
+  {
+    apply_rotation(delta_param, false);
+  }
+}
+
+// compute exponential of a real, antisymmetric matrix by diagonalizing and
+// exponentiating eigenvalues
+template<typename T>
+void RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
+{
+  const int n = mat.rows();
+  std::vector<std::complex<RealType>> mat_h(n * n, 0);
+  std::vector<RealType> eval(n, 0);
+  std::vector<std::complex<RealType>> work(2 * n, 0);
+  std::vector<RealType> rwork(3 * n, 0);
+  std::vector<std::complex<RealType>> mat_d(n * n, 0);
+  std::vector<std::complex<RealType>> mat_t(n * n, 0);
+  // exponentiating e^X = e^iY (Y hermitian)
+  // i(-iX) = X, so -iX is hermitian
+  // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
+  // construct hermitian analogue of mat by multiplying by -i
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = i; j < n; ++j)
+    {
+      mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
+      mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+    }
+  }
+  // diagonalize the matrix
+  char JOBZ('V');
+  char UPLO('U');
+  int N(n);
+  int LDA(n);
+  int LWORK(2 * n);
+  int info = 0;
+  LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+  // iterate through diagonal matrix, exponentiate terms
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      mat_d[i + j * n] = (i == j) ? std::exp(std::complex<RealType>(0.0, eval[i])) : std::complex<RealType>(0.0, 0.0);
+    }
+  }
+  // perform matrix multiplication
+  // assume row major
+  BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
+  BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_d[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in orbital "
+                     "rotation matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl;
+      }
+      mat[j][i] = mat_d[i + n * j].real();
+    }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
+{
+  const int n = mat.rows();
+  std::vector<RealType> mat_h(n * n, 0);
+  std::vector<RealType> eval_r(n, 0);
+  std::vector<RealType> eval_i(n, 0);
+  std::vector<RealType> mat_l(n * n, 0);
+  std::vector<RealType> work(4 * n, 0);
+
+  std::vector<std::complex<RealType>> mat_cd(n * n, 0);
+  std::vector<std::complex<RealType>> mat_cl(n * n, 0);
+  std::vector<std::complex<RealType>> mat_ch(n * n, 0);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+      mat_h[i + n * j] = mat[i][j];
+
+  // diagonalize the matrix
+  char JOBL('V');
+  char JOBR('N');
+  int N(n);
+  int LDA(n);
+  int LWORK(4 * n);
+  int info = 0;
+  LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA,
+               &work.at(0), &LWORK, &info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+
+  // iterate through diagonal matrix, take log
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      auto tmp = (i == j) ? std::log(std::complex<RealType>(eval_r[i], eval_i[i])) : std::complex<RealType>(0.0, 0.0);
+      mat_cd[i + j * n] = tmp;
+
+      if (eval_i[j] > 0.0)
+      {
+        mat_cl[i + j * n]       = std::complex<RealType>(mat_l[i + j * n], mat_l[i + (j + 1) * n]);
+        mat_cl[i + (j + 1) * n] = std::complex<RealType>(mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
+      }
+      else if (!(eval_i[j] < 0.0))
+      {
+        mat_cl[i + j * n] = std::complex<RealType>(mat_l[i + j * n], 0.0);
+      }
+    }
+  }
+
+  RealType one(1.0);
+  RealType zero(0.0);
+  BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n);
+  BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_cd[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in antisymmetric "
+                     "matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl;
+      }
+      output[i][j] = mat_cd[i + n * j].real();
+    }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
+                                          const opt_variables_type& optvars,
+                                          ValueVector& psi,
+                                          const ValueVector& psiinv,
+                                          std::vector<T>& ratios,
+                                          Matrix<T>& dratios,
+                                          int FirstIndex,
+                                          int LastIndex)
+{
+  Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  const ParticleSet& P = VP.getRefPS();
+  int iel              = VP.refPtcl;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_orig;
+  T_orig.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo);
+
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
+
+  ValueVector tmp_psi;
+  tmp_psi.resize(nmo);
+
+  for (int iat = 0; iat < VP.getTotalNum(); iat++)
+  {
+    Phi->evaluateValue(VP, iat, tmp_psi);
+
+    for (int j = 0; j < nmo; j++)
+      psiM_all(iel - FirstIndex, j) = tmp_psi[j];
+
+    for (int i = 0; i < nel; i++)
+      for (int j = 0; j < nel; j++)
+        psiM_inv(i, j) = psiM_all(i, j);
+
+    Invert(psiM_inv.data(), nel, nel);
+
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+
+    // The matrix A is rectangular.  Ainv is the inverse of the square part
+    // of the matrix. The multiply of Ainv and the square part of A is just
+    // the identity. This multiply could be reduced to Ainv and the
+    // non-square part of A.
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+    {
+      int kk = this->myVars.where(i);
+      if (kk >= 0)
+      {
+        const int p      = m_act_rot_inds.at(i).first;
+        const int q      = m_act_rot_inds.at(i).second;
+        dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars)
+      }
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                            const opt_variables_type& optvars,
+                                            Vector<T>& dlogpsi,
+                                            int FirstIndex,
+                                            int LastIndex)
+{
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] = T_mat(p, q);
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+                                          const opt_variables_type& optvars,
+                                          Vector<T>& dlogpsi,
+                                          Vector<T>& dhpsioverpsi,
+                                          const int& FirstIndex,
+                                          const int& LastIndex)
+{
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+  myG_temp.resize(nel);
+  myG_J.resize(nel);
+  myL_temp.resize(nel);
+  myL_J.resize(nel);
+
+  myG_temp = 0;
+  myG_J    = 0;
+  myL_temp = 0;
+  myL_J    = 0;
+
+  Bbar.resize(nel, nmo);
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  Bbar       = 0;
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  // current value of Gradient and Laplacian
+  //  gradient components
+  for (int a = 0; a < nel; a++)
+    for (int i = 0; i < nel; i++)
+      for (int k = 0; k < 3; k++)
+        myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
+  // laplacian components
+  for (int a = 0; a < nel; a++)
+  {
+    for (int i = 0; i < nel; i++)
+      myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
+  }
+
+  // calculation of myG_J which will be used to represent
+  // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+  // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
+  // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
+  // this is what myL_J will hold
+  for (int a = 0, iat = FirstIndex; a < nel; a++, iat++)
+  {
+    myG_J[a] = (P.G[iat] - myG_temp[a]);
+    myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
+  }
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j);
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  const T* const B(Bbar.data());
+  ValueMatrix T_mat;
+  ValueMatrix Y1;
+  ValueMatrix Y2;
+  ValueMatrix Y3;
+  ValueMatrix Y4;
+  T_mat.resize(nel, nmo);
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), nel, T(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] += T_mat(p, q);
+      dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+                                          const opt_variables_type& optvars,
+                                          Vector<T>& dlogpsi,
+                                          Vector<T>& dhpsioverpsi,
+                                          const T& psiCurrent,
+                                          const std::vector<T>& Coeff,
+                                          const std::vector<size_t>& C2node_up,
+                                          const std::vector<size_t>& C2node_dn,
+                                          const ValueVector& detValues_up,
+                                          const ValueVector& detValues_dn,
+                                          const GradMatrix& grads_up,
+                                          const GradMatrix& grads_dn,
+                                          const ValueMatrix& lapls_up,
+                                          const ValueMatrix& lapls_dn,
+                                          const ValueMatrix& M_up,
+                                          const ValueMatrix& M_dn,
+                                          const ValueMatrix& Minv_up,
+                                          const ValueMatrix& Minv_dn,
+                                          const GradMatrix& B_grad,
+                                          const ValueMatrix& B_lapl,
+                                          const std::vector<int>& detData_up,
+                                          const size_t N1,
+                                          const size_t N2,
+                                          const size_t NP1,
+                                          const size_t NP2,
+                                          const std::vector<std::vector<int>>& lookup_tbl)
+{
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    ParticleSet::ParticleGradient myG_temp, myG_J;
+    ParticleSet::ParticleLaplacian myL_temp, myL_J;
+    const int NP = P.getTotalNum();
+    myG_temp.resize(NP);
+    myG_temp = 0.0;
+    myL_temp.resize(NP);
+    myL_temp = 0.0;
+    myG_J.resize(NP);
+    myG_J = 0.0;
+    myL_J.resize(NP);
+    myL_J            = 0.0;
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
+
+    const T* restrict C_p = Coeff.data();
+    for (int i = 0; i < Coeff.size(); i++)
+    {
+      const size_t upC = C2node_up[i];
+      const size_t dnC = C2node_dn[i];
+      const T tmp1     = C_p[i] * detValues_dn[dnC];
+      const T tmp2     = C_p[i] * detValues_up[upC];
+      for (size_t k = 0, j = N1; k < NP1; k++, j++)
+      {
+        myG_temp[j] += tmp1 * grads_up(upC, k);
+        myL_temp[j] += tmp1 * lapls_up(upC, k);
+      }
+      for (size_t k = 0, j = N2; k < NP2; k++, j++)
+      {
+        myG_temp[j] += tmp2 * grads_dn(dnC, k);
+        myL_temp[j] += tmp2 * lapls_dn(dnC, k);
+      }
+    }
+
+    myG_temp *= (1 / psiCurrent);
+    myL_temp *= (1 / psiCurrent);
+
+    // calculation of myG_J which will be used to represent
+    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
+    // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
+    // \psi}{\psi} and this is what myL_J will hold
+    for (int iat = 0; iat < (myL_temp.size()); iat++)
+    {
+      myG_J[iat] = (P.G[iat] - myG_temp[iat]);
+      myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+    }
+
+    table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn,
+                      detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn,
+                      B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                            const opt_variables_type& optvars,
+                                            Vector<T>& dlogpsi,
+                                            const FullRealType& psiCurrent,
+                                            const std::vector<T>& Coeff,
+                                            const std::vector<size_t>& C2node_up,
+                                            const std::vector<size_t>& C2node_dn,
+                                            const ValueVector& detValues_up,
+                                            const ValueVector& detValues_dn,
+                                            const ValueMatrix& M_up,
+                                            const ValueMatrix& M_dn,
+                                            const ValueMatrix& Minv_up,
+                                            const ValueMatrix& Minv_dn,
+                                            const std::vector<int>& detData_up,
+                                            const std::vector<std::vector<int>>& lookup_tbl)
+{
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
+
+    table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up,
+                        M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi,
+                                        Vector<T>& dhpsioverpsi,
+                                        const ParticleSet::ParticleLaplacian& myL_J,
+                                        const ParticleSet::ParticleGradient& myG_J,
+                                        const size_t nel,
+                                        const size_t nmo,
+                                        const T& psiCurrent,
+                                        const std::vector<T>& Coeff,
+                                        const std::vector<size_t>& C2node_up,
+                                        const std::vector<size_t>& C2node_dn,
+                                        const ValueVector& detValues_up,
+                                        const ValueVector& detValues_dn,
+                                        const GradMatrix& grads_up,
+                                        const GradMatrix& grads_dn,
+                                        const ValueMatrix& lapls_up,
+                                        const ValueMatrix& lapls_dn,
+                                        const ValueMatrix& M_up,
+                                        const ValueMatrix& M_dn,
+                                        const ValueMatrix& Minv_up,
+                                        const ValueMatrix& Minv_dn,
+                                        const GradMatrix& B_grad,
+                                        const ValueMatrix& B_lapl,
+                                        const std::vector<int>& detData_up,
+                                        const size_t N1,
+                                        const size_t N2,
+                                        const size_t NP1,
+                                        const size_t NP2,
+                                        const std::vector<std::vector<int>>& lookup_tbl)
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+GUIDE TO THE MATICES BEING BUILT
+----------------------------------------------
+The idea here is that there is a loop over all unique determinants. For each
+determiant the table method is employed to calculate the contributions to the
+parameter derivatives (dhpsioverpsi/dlogpsi)
+
+  loop through unquie determinants
+	loop through parameters
+	  evaluate contributaion to dlogpsi and dhpsioverpsi
+\noindent
+
+  BLAS GUIDE  for matrix multiplication of  [  alpha * A.B + beta * C = C ]
+  Matrix A is of dimensions a1,a2 and Matrix B is b1,b2   in which a2=b1
+  The BLAS command is as follows...
+
+ BLAS::gemm('N','N', b2, a1, a2 ,alpha, B, b2, A, a2, beta, C, b2);
+
+Below is a human readable format for the matrix multiplications performed
+below...
+
+This notation is inspired by http://dx.doi.org/10.1063/1.4948778
+\newline
+\hfill\break
+$
+	A_{i,j}=\phi_j(r_{i}) \\
+	T = A^{-1} \widetilde{A} \\
+	B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+\phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\
+	\hat{O_{I}} = \hat{O}D_{I} \\
+	D_{I}=det(A_{I}) \newline
+	\psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
+	\Psi_{total} = \psi_{J}\psi_{MS} \\
+	\alpha_{I} = P^{T}_{I}TQ_{I} \\
+	M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
+A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\
+$
+\newline
+There are three constants I use in the expressions for dhpsioverpsi and dlogpsi
+\newline
+\hfill\break
+$
+  const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})*
+det(\alpha_{I\uparrow}) \\
+  const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1}
+C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\
+  const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})*
+Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\
+$
+\newline
+Below is a translation of the shorthand I use to represent matrices independent
+of ``excitation matrix". \newline \hfill\break
+$
+	Y_{1} =  A^{-1}B   \\
+	Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
+	Y_{3} = A^{-1}\widetilde{B} \\
+	Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
+)\\
+$
+\newline
+Below is a translation of the shorthand I use to represent matrices dependent on
+``excitation" with respect to the reference Matrix and sums of matrices. Above
+this line I have represented these excitation matrices with a subscript ``I" but
+from this point on The subscript will be omitted and it is clear that whenever a
+matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The
+reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix.
+\newline
+\hfill\break
+$
+	Y_{5} = TQ \\
+	Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
+	Y_{7} = \alpha_{I}^{-1} P^{T} \\
+	Y_{11} = \widetilde{M}Q \\
+	Y_{23} = P^{T}\widetilde{M}Q \\
+	Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
+	Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
+	Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
+$
+\newline
+So far you will notice that I have not included up or down arrows to specify
+what spin the matrices are of. This is because we are calculating the derivative
+of all up or all down spin orbital rotation parameters at a time. If we are
+finding the up spin derivatives then any term that is down spin will be
+constant. The following assumes that we are taking up-spin MO rotation parameter
+derivatives. Of course the down spin expression can be retrieved by swapping the
+up and down arrows. I have dubbed any expression with lowercase p prefix as a
+"precursor" to an expression actually used... \newline \hfill\break
+$
+	\dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
+	\ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
+	pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
+(Q\alpha_{I}^{-1}P^{T}) \\
+	pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
+\alpha_{I}^{-1}P^{T}) \\
+$
+\newline
+Now these p matrices will be used to make various expressions via BLAS commands.
+\newline
+\hfill\break
+$
+	K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\
+	TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
+	K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
+	TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
+	K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\
+	TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\
+	K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}T) \\
+	TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
+det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\
+	MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
+det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\
+	K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}T) \\
+	TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}T)\\ \\
+	K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
+	TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
+\\ \\
+	K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
+M_{I} \alpha_{I}^{-1}P^{T} T)  \\
+	TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
+M_{I} \alpha_{I}^{-1}P^{T} T)  \\
+$
+\newline
+Now with all these matrices and constants the expressions of dhpsioverpsi and
+dlogpsi can be created.
+
+
+
+
+In addition I will be using a special generalization of the kinetic operator
+which I will denote as O. Our Slater matrix with the special O operator applied
+to each element will be called B_bar
+
+$
+``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+\phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i})
+$
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+{
+  ValueMatrix Table;
+  ValueMatrix Bbar;
+  ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
+  ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  pK1.resize(nmo, nel);
+  K1T.resize(nmo, nmo);
+  TK1T.resize(nel, nmo);
+
+  pK2.resize(nmo, nel);
+  K2AiB.resize(nmo, nmo);
+  TK2AiB.resize(nel, nmo);
+  K2XA.resize(nmo, nmo);
+  TK2XA.resize(nel, nmo);
+  K2T.resize(nmo, nmo);
+  TK2T.resize(nel, nmo);
+  MK2T.resize(nel, nmo);
+
+  pK3.resize(nmo, nel);
+  K3T.resize(nmo, nmo);
+  TK3T.resize(nel, nmo);
+
+  pK5.resize(nmo, nel);
+  K5T.resize(nmo, nmo);
+  TK5T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+  // B_grad holds the gradient operator
+  // B_lapl holds the laplacian operator
+  // B_bar will hold our special O operator
+
+  const int offset1(N1);
+  const int offset2(N2);
+  const int NPother(NP2);
+
+  T* T_(Table.data());
+
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j);
+
+  const T* restrict B(Bbar.data());
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  // Need to create the constants: (Oi, const0, const1, const2)to take
+  // advantage of minimal BLAS commands; Oi is the special operator applied to
+  // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
+  //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
+  //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
+  std::vector<RealType> Oi(num_unique_dn_dets);
+
+  for (int index = 0; index < num_unique_dn_dets; index++)
+    for (int iat = 0; iat < NPother; iat++)
+      Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
+          myL_J[offset2 + iat] * detValues_dn[index];
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
+  // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
+  // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
+  // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+    const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK1.begin(), pK1.end(), 0.0);
+  std::fill(pK2.begin(), pK2.end(), 0.0);
+  std::fill(pK3.begin(), pK3.end(), 0.0);
+  std::fill(pK5.begin(), pK5.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
+    }
+
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y11.resize(nel, k);
+      Y23.resize(k, k);
+      Y24.resize(k, k);
+      Y25.resize(k, k);
+      Y26.resize(k, nel);
+
+      std::fill(Y11.begin(), Y11.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k);
+      }
+
+      std::fill(Y23.begin(), Y23.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1);
+      }
+
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k);
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k);
+
+      Y26.resize(k, nel);
+
+      std::fill(Y26.begin(), Y26.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      // c_Tr_AlphaI_MI is a constant contributing to constant const2
+      // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
+      RealType c_Tr_AlphaI_MI = 0.0;
+      for (int i = 0; i < k; i++)
+      {
+        c_Tr_AlphaI_MI += Y24(i, i);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
+        const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]);
+        const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]);
+
+        const2 += alpha_1;
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+        }
+      }
+      datum += 3 * k + 1;
+    }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(),
+             nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(),
+             nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * Y4(i, j) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
+                   K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) -
+                   const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * (Y4(i, j) - Y4(j, i)) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
+                   TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) +
+                   K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) +
+                   const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
+      }
+      else
+      {
+        dhpsioverpsi[kk] += T(-0.5 *
+                              (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i)
+
+                               + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) +
+                               K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i)));
+      }
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi,
+                                          const size_t nel,
+                                          const size_t nmo,
+                                          const T& psiCurrent,
+                                          const std::vector<T>& Coeff,
+                                          const std::vector<size_t>& C2node_up,
+                                          const std::vector<size_t>& C2node_dn,
+                                          const ValueVector& detValues_up,
+                                          const ValueVector& detValues_dn,
+                                          const ValueMatrix& M_up,
+                                          const ValueMatrix& M_dn,
+                                          const ValueMatrix& Minv_up,
+                                          const ValueMatrix& Minv_dn,
+                                          const std::vector<int>& detData_up,
+                                          const std::vector<std::vector<int>>& lookup_tbl)
+{
+  ValueMatrix Table;
+  ValueMatrix Y5, Y6, Y7;
+  ValueMatrix pK4, K4T, TK4T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  pK4.resize(nmo, nel);
+  K4T.resize(nmo, nmo);
+  TK4T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+
+  T* T_(Table.data());
+
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK4.begin(), pK4.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
+    }
+
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent));
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+        }
+      }
+      datum += 3 * k + 1;
+    }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dlogpsi[kk] +=
+            T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) +
+                         (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
+      }
+      else
+      {
+        dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
+      }
+    }
+  }
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<RotatedSPOsT>(SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
+
+  myclone->params          = this->params;
+  myclone->params_supplied = this->params_supplied;
+  myclone->m_act_rot_inds  = this->m_act_rot_inds;
+  myclone->m_full_rot_inds = this->m_full_rot_inds;
+  myclone->myVars          = this->myVars;
+  myclone->myVarsFull      = this->myVarsFull;
+  myclone->history_params_ = this->history_params_;
+  myclone->use_global_rot_ = this->use_global_rot_;
+  return myclone;
+}
+
+// Class concrete types from ValueType
+template class RotatedSPOsT<double>;
+template class RotatedSPOsT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
new file mode 100644
index 0000000000..3273681455
--- /dev/null
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -0,0 +1,420 @@
+//////////////////////////////////////////////////////////////////////////////////////
+//// This file is distributed under the University of Illinois/NCSA Open Source
+/// License. / See LICENSE file in top directory for details.
+////
+//// Copyright (c) QMCPACK developers.
+////
+//// File developed by: Sergio D. Pineda Flores,
+/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
+/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
+/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+////
+//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
+/// University of California, Berkeley
+////////////////////////////////////////////////////////////////////////////////////////
+#ifndef QMCPLUSPLUS_ROTATEDSPOST_H
+#define QMCPLUSPLUS_ROTATEDSPOST_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+class RotatedSPOsT;
+namespace testing
+{
+opt_variables_type& getMyVarsFull(RotatedSPOsT<double>& rot);
+opt_variables_type& getMyVarsFull(RotatedSPOsT<float>& rot);
+std::vector<std::vector<double>>& getHistoryParams(RotatedSPOsT<double>& rot);
+std::vector<std::vector<float>>& getHistoryParams(RotatedSPOsT<float>& rot);
+} // namespace testing
+
+template<class T>
+class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
+{
+public:
+  using IndexType    = typename SPOSetT<T>::IndexType;
+  using RealType     = typename SPOSetT<T>::RealType;
+  using FullRealType = typename SPOSetT<T>::FullRealType;
+  using ValueVector  = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix  = typename SPOSetT<T>::ValueMatrix;
+  using GradVector   = typename SPOSetT<T>::GradVector;
+  using GradMatrix   = typename SPOSetT<T>::GradMatrix;
+  using HessVector   = typename SPOSetT<T>::HessVector;
+  using HessMatrix   = typename SPOSetT<T>::HessMatrix;
+  using GGGVector    = typename SPOSetT<T>::GGGVector;
+  using GGGMatrix    = typename SPOSetT<T>::GGGMatrix;
+
+  // constructor
+  RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
+  // destructor
+  ~RotatedSPOsT() override;
+
+  std::string getClassName() const override { return "RotatedSPOsT"; }
+  bool isOptimizable() const override { return true; }
+  bool isOMPoffload() const override { return Phi->isOMPoffload(); }
+  bool hasIonDerivs() const override { return Phi->hasIonDerivs(); }
+
+  // Vector of rotation matrix indices
+  using RotationIndices = std::vector<std::pair<int, int>>;
+
+  // Active orbital rotation parameter indices
+  RotationIndices m_act_rot_inds;
+
+  // Full set of rotation values for global rotation
+  RotationIndices m_full_rot_inds;
+
+  // Construct a list of the matrix indices for non-zero rotation parameters.
+  // (The structure for a sparse representation of the matrix)
+  // Only core->active rotations are created.
+  static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Construct a list for all the matrix indices, including core->active,
+  // core->core and active->active
+  static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Fill in antisymmetric matrix from the list of rotation parameter indices
+  // and a list of parameter values.
+  // This function assumes rot_mat is properly sized upon input and is set to
+  // zero.
+  static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                           const std::vector<RealType>& param,
+                                           ValueMatrix& rot_mat);
+
+  // Extract the list of rotation parameters from the entries in an
+  // antisymmetric matrix This function expects rot_indices and param are the
+  // same length.
+  static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const ValueMatrix& rot_mat,
+                                                   std::vector<RealType>& param);
+
+  // function to perform orbital rotations
+  void apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
+
+  // For global rotation, inputs are the old parameters and the delta
+  // parameters. The corresponding rotation matrices are constructed,
+  // multiplied together, and the new parameters extracted. The new rotation
+  // is applied to the underlying SPO coefficients
+  void applyDeltaRotation(const std::vector<RealType>& delta_param,
+                          const std::vector<RealType>& old_param,
+                          std::vector<RealType>& new_param);
+
+  // Perform the construction of matrices and extraction of parameters for a
+  // delta rotation. Split out and made static for testing.
+  static void constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                     const std::vector<RealType>& old_param,
+                                     const RotationIndices& act_rot_inds,
+                                     const RotationIndices& full_rot_inds,
+                                     std::vector<RealType>& new_param,
+                                     ValueMatrix& new_rot_mat);
+
+  // When initializing the rotation from VP files
+  // This function applies the rotation history
+  void applyRotationHistory();
+
+  // This function applies the global rotation (similar to apply_rotation, but
+  // for the full set of rotation parameters)
+  void applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy);
+
+  // Compute matrix exponential of an antisymmetric matrix (result is rotation
+  // matrix)
+  static void exponentiate_antisym_matrix(ValueMatrix& mat);
+
+  // Compute matrix log of rotation matrix to produce antisymmetric matrix
+  static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
+
+  // A particular SPOSet used for Orbitals
+  std::unique_ptr<SPOSetT<T>> Phi;
+
+  /// Set the rotation parameters (usually from input file)
+  void setRotationParameters(const std::vector<RealType>& param_list);
+
+  /// the number of electrons of the majority spin
+  size_t nel_major_;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
+  // Determinant part of the wfn myG_J is the Gradient of the all other parts
+  // of the wavefunction (typically just the Jastrow).
+  //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
+  // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
+  // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
+  // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
+  // will hold
+  ParticleSet::ParticleGradient myG_temp, myG_J;
+  ParticleSet::ParticleLaplacian myL_temp, myL_J;
+
+  ValueMatrix Bbar;
+  ValueMatrix psiM_inv;
+  ValueMatrix psiM_all;
+  GradMatrix dpsiM_all;
+  ValueMatrix d2psiM_all;
+
+  // Single Slater creation
+  void buildOptVariables(size_t nel);
+
+  // For the MSD case rotations must be created in MultiSlaterDetTableMethod
+  // class
+  void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations);
+
+  void evaluateDerivatives(ParticleSet& P,
+                           const opt_variables_type& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const int& FirstIndex,
+                           const int& LastIndex) override;
+
+  void evaluateDerivativesWF(ParticleSet& P,
+                             const opt_variables_type& optvars,
+                             Vector<T>& dlogpsi,
+                             int FirstIndex,
+                             int LastIndex) override;
+
+  void evaluateDerivatives(ParticleSet& P,
+                           const opt_variables_type& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const GradMatrix& grads_up,
+                           const GradMatrix& grads_dn,
+                           const ValueMatrix& lapls_up,
+                           const ValueMatrix& lapls_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const GradMatrix& B_grad,
+                           const ValueMatrix& B_lapl,
+                           const std::vector<int>& detData_up,
+                           const size_t N1,
+                           const size_t N2,
+                           const size_t NP1,
+                           const size_t NP2,
+                           const std::vector<std::vector<int>>& lookup_tbl) override;
+
+  void evaluateDerivativesWF(ParticleSet& P,
+                             const opt_variables_type& optvars,
+                             Vector<T>& dlogpsi,
+                             const FullRealType& psiCurrent,
+                             const std::vector<T>& Coeff,
+                             const std::vector<size_t>& C2node_up,
+                             const std::vector<size_t>& C2node_dn,
+                             const ValueVector& detValues_up,
+                             const ValueVector& detValues_dn,
+                             const ValueMatrix& M_up,
+                             const ValueMatrix& M_dn,
+                             const ValueMatrix& Minv_up,
+                             const ValueMatrix& Minv_dn,
+                             const std::vector<int>& detData_up,
+                             const std::vector<std::vector<int>>& lookup_tbl) override;
+
+  // helper function to evaluatederivative; evaluate orbital rotation
+  // parameter derivative using table method
+  void table_method_eval(Vector<T>& dlogpsi,
+                         Vector<T>& dhpsioverpsi,
+                         const ParticleSet::ParticleLaplacian& myL_J,
+                         const ParticleSet::ParticleGradient& myG_J,
+                         const size_t nel,
+                         const size_t nmo,
+                         const T& psiCurrent,
+                         const std::vector<T>& Coeff,
+                         const std::vector<size_t>& C2node_up,
+                         const std::vector<size_t>& C2node_dn,
+                         const ValueVector& detValues_up,
+                         const ValueVector& detValues_dn,
+                         const GradMatrix& grads_up,
+                         const GradMatrix& grads_dn,
+                         const ValueMatrix& lapls_up,
+                         const ValueMatrix& lapls_dn,
+                         const ValueMatrix& M_up,
+                         const ValueMatrix& M_dn,
+                         const ValueMatrix& Minv_up,
+                         const ValueMatrix& Minv_dn,
+                         const GradMatrix& B_grad,
+                         const ValueMatrix& B_lapl,
+                         const std::vector<int>& detData_up,
+                         const size_t N1,
+                         const size_t N2,
+                         const size_t NP1,
+                         const size_t NP2,
+                         const std::vector<std::vector<int>>& lookup_tbl);
+
+  void table_method_evalWF(Vector<T>& dlogpsi,
+                           const size_t nel,
+                           const size_t nmo,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const std::vector<int>& detData_up,
+                           const std::vector<std::vector<int>>& lookup_tbl);
+
+  void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); }
+
+  void checkInVariablesExclusive(opt_variables_type& active) override
+  {
+    if (this->myVars.size())
+      active.insertFrom(this->myVars);
+  }
+
+  void checkOutVariables(const opt_variables_type& active) override { this->myVars.getIndex(active); }
+
+  /// reset
+  void resetParametersExclusive(const opt_variables_type& active) override;
+
+  void writeVariationalParameters(hdf_archive& hout) override;
+
+  void readVariationalParameters(hdf_archive& hin) override;
+
+  //*********************************************************************************
+  // the following functions simply call Phi's corresponding functions
+  void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); }
+
+  void checkObject() const override { Phi->checkObject(); }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateValue(P, iat, psi);
+  }
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
+  }
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) override
+  {
+    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+  }
+
+  void evaluateDerivRatios(const VirtualParticleSet& VP,
+                           const opt_variables_type& optvars,
+                           ValueVector& psi,
+                           const ValueVector& psiinv,
+                           std::vector<T>& ratios,
+                           Matrix<T>& dratios,
+                           int FirstIndex,
+                           int LastIndex) override;
+
+  void evaluateVGH(const ParticleSet& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
+  }
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override
+  {
+    Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi);
+  }
+
+  //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
+  //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
+  //  last, grad_grad_grad_logdet); }
+
+  /// Use history list (false) or global rotation (true)
+  void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
+
+private:
+  /// true if SPO parameters (orbital rotation parameters) have been supplied
+  /// by input
+  bool params_supplied;
+  /// list of supplied orbital rotation parameters
+  std::vector<RealType> params;
+
+  /// Full set of rotation matrix parameters for use in global rotation method
+  opt_variables_type myVarsFull;
+
+  /// List of previously applied parameters
+  std::vector<std::vector<RealType>> history_params_;
+
+  /// Use global rotation or history list
+  bool use_global_rot_ = true;
+
+  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<double>& rot);
+  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<float>& rot);
+  friend std::vector<std::vector<double>>& testing::getHistoryParams(RotatedSPOsT<double>& rot);
+  friend std::vector<std::vector<float>>& testing::getHistoryParams(RotatedSPOsT<float>& rot);
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
index e61f4cace1..34c76bad82 100644
--- a/src/QMCWaveFunctions/SPOSetT.cpp
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -134,7 +134,7 @@ void SPOSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet
                                                 std::vector<GradType>& grads) const
 {
   assert(this == &spo_list.getLeader());
-  assert(phi_vgl_v.size(0) == DIM_VGL);
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
   assert(phi_vgl_v.size(1) == spo_list.size());
   const size_t nw             = spo_list.size();
   const size_t norb_requested = phi_vgl_v.size(2);
@@ -360,7 +360,7 @@ template<class T>
 void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
                                        const opt_variables_type& optvars,
                                        Vector<T>& dlogpsi,
-                                       const T& psiCurrent,
+                                       const typename QTFull::ValueType& psiCurrent,
                                        const std::vector<T>& Coeff,
                                        const std::vector<size_t>& C2node_up,
                                        const std::vector<size_t>& C2node_dn,
@@ -435,4 +435,4 @@ template class SPOSetT<float>;
 template class SPOSetT<std::complex<double>>;
 template class SPOSetT<std::complex<float>>;
 
-} // namespace qmcplusplus
\ No newline at end of file
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
index 95643fd3c5..ddc14c6593 100644
--- a/src/QMCWaveFunctions/SPOSetT.h
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -65,6 +65,10 @@ class SPOSetT : public QMCTraits
   using SPOMap            = std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
   using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
   using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+  using PosType           = typename OrbitalSetTraits<T>::PosType;
+  using RealType          = typename OrbitalSetTraits<T>::RealType;
+  using ValueType         = typename OrbitalSetTraits<T>::ValueType;
+  using FullRealType      = typename OrbitalSetTraits<double>::RealType;
   template<typename DT>
   using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
 
@@ -176,7 +180,7 @@ class SPOSetT : public QMCTraits
   virtual void evaluateDerivativesWF(ParticleSet& P,
                                      const opt_variables_type& optvars,
                                      Vector<T>& dlogpsi,
-                                     const T& psiCurrent,
+                                     const typename QTFull::ValueType& psiCurrent,
                                      const std::vector<T>& Coeff,
                                      const std::vector<size_t>& C2node_up,
                                      const std::vector<size_t>& C2node_dn,
diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp
new file mode 100644
index 0000000000..64d7d3d6b1
--- /dev/null
+++ b/src/QMCWaveFunctions/SpinorSetT.cpp
@@ -0,0 +1,586 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+//
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SpinorSetT.h"
+#include "Utilities/ResourceCollection.h"
+#include "Platforms/OMPTarget/OMPTargetMath.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+struct SpinorSetT<T>::SpinorSetMultiWalkerResource : public Resource
+{
+  SpinorSetMultiWalkerResource() : Resource("SpinorSet") {}
+  SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {}
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<SpinorSetMultiWalkerResource>(*this); }
+  OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
+  std::vector<T> up_ratios, dn_ratios;
+  std::vector<GradType> up_grads, dn_grads;
+  std::vector<RealType> spins;
+};
+
+template<class T>
+SpinorSetT<T>::SpinorSetT(const std::string& my_name) : SPOSetT<T>(my_name), spo_up(nullptr), spo_dn(nullptr)
+{}
+
+template<class T>
+SpinorSetT<T>::~SpinorSetT() = default;
+
+template<class T>
+void SpinorSetT<T>::set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
+{
+  //Sanity check for input SPO's.  They need to be the same size or
+  IndexType spo_size_up   = up->getOrbitalSetSize();
+  IndexType spo_size_down = dn->getOrbitalSetSize();
+
+  if (spo_size_up != spo_size_down)
+    throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO components have different sizes.");
+
+  setOrbitalSetSize(spo_size_up);
+
+  spo_up = std::move(up);
+  spo_dn = std::move(dn);
+
+  psi_work_up.resize(this->OrbitalSetSize);
+  psi_work_down.resize(this->OrbitalSetSize);
+
+  dpsi_work_up.resize(this->OrbitalSetSize);
+  dpsi_work_down.resize(this->OrbitalSetSize);
+
+  d2psi_work_up.resize(this->OrbitalSetSize);
+  d2psi_work_down.resize(this->OrbitalSetSize);
+}
+
+template<class T>
+void SpinorSetT<T>::setOrbitalSetSize(int norbs)
+{
+  this->OrbitalSetSize = norbs;
+};
+
+template<class T>
+void SpinorSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
+
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  //This is only supported in the complex build, so T is some complex number depending on the precision.
+  T eis(coss, sins);
+  T emis(coss, -sins);
+
+  psi = eis * psi_work_up + emis * psi_work_down;
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
+
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateVGL_spin(const ParticleSet& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi,
+                                     ValueVector& dspin)
+{
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
+
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
+
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+  dspin = eye * (eis * psi_work_up - emis * psi_work_down);
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                           int iat,
+                                           const RefVector<ValueVector>& psi_v_list,
+                                           const RefVector<GradVector>& dpsi_v_list,
+                                           const RefVector<ValueVector>& d2psi_v_list,
+                                           OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw                    = spo_list.size();
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
+  RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
+  RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
+  for (int iw = 0; iw < nw; iw++)
+  {
+    auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_psi_v_list.push_back(spo.psi_work_up);
+    dn_psi_v_list.push_back(spo.psi_work_down);
+    up_dpsi_v_list.push_back(spo.dpsi_work_up);
+    dn_dpsi_v_list.push_back(spo.dpsi_work_down);
+    up_d2psi_v_list.push_back(spo.d2psi_work_up);
+    dn_d2psi_v_list.push_back(spo.d2psi_work_down);
+  }
+
+  up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list);
+  dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list);
+
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    psi_v_list[iw].get()   = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
+    dpsi_v_list[iw].get()  = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
+    d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
+    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+      mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]);
+  }
+  //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device
+  mw_dspin.updateTo();
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                                           int iat,
+                                                           const std::vector<const T*>& invRow_ptr_list,
+                                                           OffloadMWVGLArray& phi_vgl_v,
+                                                           std::vector<T>& ratios,
+                                                           std::vector<GradType>& grads,
+                                                           std::vector<T>& spingrads) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+  assert(phi_vgl_v.size(0) == DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+
+  auto& mw_res       = spo_leader.mw_res_handle_.getResource();
+  auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
+  auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
+  auto& up_ratios    = mw_res.up_ratios;
+  auto& dn_ratios    = mw_res.dn_ratios;
+  auto& up_grads     = mw_res.up_grads;
+  auto& dn_grads     = mw_res.dn_grads;
+  auto& spins        = mw_res.spins;
+
+  up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  up_ratios.resize(nw);
+  dn_ratios.resize(nw);
+  up_grads.resize(nw);
+  dn_grads.resize(nw);
+  spins.resize(nw);
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios,
+                                               up_grads);
+  dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios,
+                                               dn_grads);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+    spins[iw]               = s;
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    ratios[iw]    = eis * up_ratios[iw] + emis * dn_ratios[iw];
+    grads[iw]     = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw];
+    spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
+  }
+
+  auto* spins_ptr = spins.data();
+  //This data lives on the device
+  auto* phi_vgl_ptr    = phi_vgl_v.data();
+  auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
+  auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
+  PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RealType c, s;
+    omptarget::sincos(spins_ptr[iw], &s, &c);
+    T eis(c, s), emis(c, -s);
+    PRAGMA_OFFLOAD("omp parallel for collapse(2)")
+    for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iorb = 0; iorb < norb_requested; iorb++)
+      {
+        auto offset         = idim * nw * norb_requested + iw * norb_requested + iorb;
+        phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset];
+      }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet)
+{
+  IndexType nelec = P.getTotalNum();
+
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+    RealType coss(0.0), sins(0.0);
+
+    coss = std::cos(s);
+    sins = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)   = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)  = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+    }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                            const RefVectorWithLeader<ParticleSet>& P_list,
+                                            int first,
+                                            int last,
+                                            const RefVector<ValueMatrix>& logdet_list,
+                                            const RefVector<GradMatrix>& dlogdet_list,
+                                            const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw    = spo_list.size();
+  IndexType nelec = P_leader.getTotalNum();
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
+  std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
+  std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
+  mw_up_logdet.reserve(nw);
+  mw_dn_logdet.reserve(nw);
+  mw_up_dlogdet.reserve(nw);
+  mw_dn_dlogdet.reserve(nw);
+  mw_up_d2logdet.reserve(nw);
+  mw_dn_d2logdet.reserve(nw);
+
+  RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
+  RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
+  RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
+  up_logdet_list.reserve(nw);
+  dn_logdet_list.reserve(nw);
+  up_dlogdet_list.reserve(nw);
+  dn_dlogdet_list.reserve(nw);
+  up_d2logdet_list.reserve(nw);
+  dn_d2logdet_list.reserve(nw);
+
+  ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
+  GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    mw_up_logdet.emplace_back(tmp_val_mat);
+    up_logdet_list.emplace_back(mw_up_logdet.back());
+    mw_dn_logdet.emplace_back(tmp_val_mat);
+    dn_logdet_list.emplace_back(mw_dn_logdet.back());
+
+    mw_up_dlogdet.emplace_back(tmp_grad_mat);
+    up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
+    mw_dn_dlogdet.emplace_back(tmp_grad_mat);
+    dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
+
+    mw_up_d2logdet.emplace_back(tmp_val_mat);
+    up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
+    mw_dn_d2logdet.emplace_back(tmp_val_mat);
+    dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
+  }
+
+  up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list,
+                                        up_d2logdet_list);
+  dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list,
+                                        dn_d2logdet_list);
+
+  for (int iw = 0; iw < nw; iw++)
+    for (int iat = 0; iat < nelec; iat++)
+    {
+      ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+      RealType coss           = std::cos(s);
+      RealType sins           = std::sin(s);
+      T eis(coss, sins);
+      T emis(coss, -sins);
+
+      for (int no = 0; no < this->OrbitalSetSize; no++)
+      {
+        logdet_list[iw].get()(iat, no) =
+            eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no);
+        dlogdet_list[iw].get()(iat, no) =
+            eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no);
+        d2logdet_list[iw].get()(iat, no) =
+            eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no);
+      }
+    }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet,
+                                              ValueMatrix& dspinlogdet)
+{
+  IndexType nelec = P.getTotalNum();
+
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+    RealType coss(0.0), sins(0.0);
+
+    coss = std::cos(s);
+    sins = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)      = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)     = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no)    = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+      dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no));
+    }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+{
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
+
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
+
+  psi  = eis * psi_work_up + emis * psi_work_down;
+  dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& gradphi)
+{
+  IndexType nelec = P.getTotalNum();
+
+  GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
+  GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
+  spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
+  spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    for (int imo = 0; imo < this->OrbitalSetSize; imo++)
+      gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
+  }
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SpinorSetT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
+  std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
+  std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
+  myclone->set_spos(std::move(cloneup), std::move(clonedn));
+  return myclone;
+}
+
+template<class T>
+void SpinorSetT<T>::createResource(ResourceCollection& collection) const
+{
+  spo_up->createResource(collection);
+  spo_dn->createResource(collection);
+  auto index = collection.addResource(std::make_unique<SpinorSetMultiWalkerResource>());
+}
+
+template<class T>
+void SpinorSetT<T>::acquireResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.acquireResource(collection, up_spo_list);
+  dn_spo_leader.acquireResource(collection, dn_spo_list);
+  spo_leader.mw_res_handle_ = collection.lendResource<SpinorSetMultiWalkerResource>();
+}
+
+template<class T>
+void SpinorSetT<T>::releaseResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.releaseResource(collection, up_spo_list);
+  dn_spo_leader.releaseResource(collection, dn_spo_list);
+  collection.takebackResource(spo_leader.mw_res_handle_);
+}
+
+template<class T>
+std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> SpinorSetT<T>::extractSpinComponentRefList(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  SpinorSetT<T>& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  IndexType nw              = spo_list.size();
+  SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
+  SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
+  RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
+  RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
+  up_spo_list.reserve(nw);
+  dn_spo_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    SpinorSetT<T>& spinor = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_spo_list.emplace_back(*(spinor.spo_up));
+    dn_spo_list.emplace_back(*(spinor.spo_dn));
+  }
+  return std::make_pair(up_spo_list, dn_spo_list);
+}
+
+template class SpinorSetT<std::complex<double>>;
+template class SpinorSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h
new file mode 100644
index 0000000000..bc59e610aa
--- /dev/null
+++ b/src/QMCWaveFunctions/SpinorSetT.h
@@ -0,0 +1,229 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+//
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_SPINORSET_H
+#define QMCPLUSPLUS_SPINORSET_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "ResourceHandle.h"
+
+namespace qmcplusplus
+{
+/** Class for Melton & Mitas style Spinors.
+ *
+ */
+template<class T>
+class SpinorSetT : public SPOSetT<T>
+{
+public:
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  //using OffloadMWVGLArray = typename SPOSetT<T>::template OffloadMWCGLArray;
+  template<typename DT>
+  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
+  using RealType      = typename SPOSetT<T>::RealType;
+  using IndexType     = OHMMS_INDEXTYPE;
+
+  /** constructor */
+  SpinorSetT(const std::string& my_name);
+  ~SpinorSetT() override;
+
+  std::string getClassName() const override { return "SpinorSet"; }
+  bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); }
+  bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); }
+  bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); }
+
+  //This class is initialized by separately building the up and down channels of the spinor set and
+  //then registering them.
+  void set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
+
+  /** set the OrbitalSetSize
+   * @param norbs number of single-particle orbitals
+   */
+  void setOrbitalSetSize(int norbs) override;
+
+  /** evaluate the values of this spinor set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   */
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   * @param dspin spin gradient of the SPO
+   */
+  void evaluateVGL_spin(const ParticleSet& P,
+                        int iat,
+                        ValueVector& psi,
+                        GradVector& dpsi,
+                        ValueVector& d2psi,
+                        ValueVector& dspin) override;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals
+   */
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
+
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
+   *  Includes spin gradients
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param ratios, ratios of all walkers
+   * @param grads, spatial gradients of all walkers
+   * @param spingrads, spin gradients of all walkers
+   */
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<T>& spingrads) const override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param d2logdet laplacians
+   *
+   */
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void evaluate_notranspose_spin(const ParticleSet& P,
+                                 int first,
+                                 int last,
+                                 ValueMatrix& logdet,
+                                 GradMatrix& dlogdet,
+                                 ValueMatrix& d2logdet,
+                                 ValueMatrix& dspinlogdet) override;
+  /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat.
+   *  @param P current particle set.
+   *  @param iat electron index.
+   *  @param spinor values.
+   *  @param spin gradient values. d/ds phi(r,s).
+   *
+   */
+  void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override;
+
+  /** evaluate the gradients of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi) override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  /// check if the multi walker resource is owned. For testing only.
+  bool isResourceOwned() const { return bool(mw_res_handle_); }
+
+private:
+  struct SpinorSetMultiWalkerResource;
+  ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
+
+  std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> extractSpinComponentRefList(
+      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
+
+  //Sposet for the up and down channels of our spinors.
+  std::unique_ptr<SPOSetT<T>> spo_up;
+  std::unique_ptr<SPOSetT<T>> spo_dn;
+
+  //temporary arrays for holding the values of the up and down channels respectively.
+  ValueVector psi_work_up;
+  ValueVector psi_work_down;
+
+  //temporary arrays for holding the gradients of the up and down channels respectively.
+  GradVector dpsi_work_up;
+  GradVector dpsi_work_down;
+
+  //temporary arrays for holding the laplacians of the up and down channels respectively.
+  ValueVector d2psi_work_up;
+  ValueVector d2psi_work_down;
+
+  //Same as above, but these are the full matrices containing all spinor/particle combinations.
+  ValueMatrix logpsi_work_up;
+  ValueMatrix logpsi_work_down;
+
+  GradMatrix dlogpsi_work_up;
+  GradMatrix dlogpsi_work_down;
+
+  ValueMatrix d2logpsi_work_up;
+  ValueMatrix d2logpsi_work_down;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt
index ec066f8735..5c9484da4c 100644
--- a/src/QMCWaveFunctions/tests/CMakeLists.txt
+++ b/src/QMCWaveFunctions/tests/CMakeLists.txt
@@ -139,7 +139,7 @@ set(DETERMINANT_SRC
     test_ci_configuration.cpp
     test_multi_slater_determinant.cpp)
 
-add_library(sposets_for_testing FakeSPO.cpp ConstantSPOSet.cpp)
+add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp)
 target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(sposets_for_testing PUBLIC qmcwfs)
 
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
new file mode 100644
index 0000000000..fcf1637682
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
@@ -0,0 +1,160 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "FakeSPOT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+FakeSPOT<T>::FakeSPOT() : SPOSetT<T>("one_FakeSPO")
+{
+  a.resize(3, 3);
+
+  a(0, 0) = 2.3;
+  a(0, 1) = 4.5;
+  a(0, 2) = 2.6;
+  a(1, 0) = 0.5;
+  a(1, 1) = 8.5;
+  a(1, 2) = 3.3;
+  a(2, 0) = 1.8;
+  a(2, 1) = 4.4;
+  a(2, 2) = 4.9;
+
+  v.resize(3);
+  v[0] = 1.9;
+  v[1] = 2.0;
+  v[2] = 3.1;
+
+
+  a2.resize(4, 4);
+  a2(0, 0) = 2.3;
+  a2(0, 1) = 4.5;
+  a2(0, 2) = 2.6;
+  a2(0, 3) = 1.2;
+  a2(1, 0) = 0.5;
+  a2(1, 1) = 8.5;
+  a2(1, 2) = 3.3;
+  a2(1, 3) = 0.3;
+  a2(2, 0) = 1.8;
+  a2(2, 1) = 4.4;
+  a2(2, 2) = 4.9;
+  a2(2, 3) = 2.8;
+  a2(3, 0) = 0.8;
+  a2(3, 1) = 4.1;
+  a2(3, 2) = 3.2;
+  a2(3, 3) = 1.1;
+
+  v2.resize(4, 4);
+
+  v2(0, 0) = 3.2;
+  v2(0, 1) = 0.5;
+  v2(0, 2) = 5.9;
+  v2(0, 3) = 3.7;
+  v2(1, 0) = 0.3;
+  v2(1, 1) = 1.4;
+  v2(1, 2) = 3.9;
+  v2(1, 3) = 8.2;
+  v2(2, 0) = 3.3;
+  v2(2, 1) = 5.4;
+  v2(2, 2) = 4.9;
+  v2(2, 3) = 2.2;
+  v2(3, 1) = 5.4;
+  v2(3, 2) = 4.9;
+  v2(3, 3) = 2.2;
+
+  gv.resize(4);
+  gv[0] = GradType(1.0, 0.0, 0.1);
+  gv[1] = GradType(1.0, 2.0, 0.1);
+  gv[2] = GradType(2.0, 1.0, 0.1);
+  gv[3] = GradType(0.4, 0.3, 0.1);
+}
+template<class T>
+std::unique_ptr<SPOSetT<T>> FakeSPOT<T>::makeClone() const
+{
+  return std::make_unique<FakeSPOT<T>>(*this);
+}
+
+template<class T>
+void FakeSPOT<T>::setOrbitalSetSize(int norbs)
+{
+  this->OrbitalSetSize = norbs;
+}
+
+template<class T>
+void FakeSPOT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  if (iat < 0)
+    for (int i = 0; i < psi.size(); i++)
+      psi[i] = 1.2 * i - i * i;
+  else if (this->OrbitalSetSize == 3)
+    for (int i = 0; i < 3; i++)
+      psi[i] = a(iat, i);
+  else if (this->OrbitalSetSize == 4)
+    for (int i = 0; i < 4; i++)
+      psi[i] = a2(iat, i);
+}
+
+template<class T>
+void FakeSPOT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+    {
+      psi[i]  = v[i];
+      dpsi[i] = gv[i];
+    }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+    {
+      psi[i]  = v2(iat, i);
+      dpsi[i] = gv[i];
+    }
+  }
+}
+
+template<class T>
+void FakeSPOT<T>::evaluate_notranspose(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       ValueMatrix& logdet,
+                                       GradMatrix& dlogdet,
+                                       ValueMatrix& d2logdet)
+{
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+      for (int j = 0; j < 3; j++)
+      {
+        logdet(j, i)  = a(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+      for (int j = 0; j < 4; j++)
+      {
+        logdet(j, i)  = a2(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
+}
+
+// Class concrete types from ValueType
+template class FakeSPOT<double>;
+template class FakeSPOT<float>;
+template class FakeSPOT<std::complex<double>>;
+template class FakeSPOT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h
new file mode 100644
index 0000000000..ee452842f5
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.h
@@ -0,0 +1,62 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_FAKESPOT_H
+#define QMCPLUSPLUS_FAKESPOT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class FakeSPOT : public SPOSetT<T>
+{
+public:
+  Matrix<T> a;
+  Matrix<T> a2;
+  Vector<T> v;
+  Matrix<T> v2;
+
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType    = typename SPOSetT<T>::GradType;
+
+  typename SPOSetT<T>::GradVector gv;
+
+  FakeSPOT();
+
+  ~FakeSPOT() override = default;
+
+  std::string getClassName() const override { return "FakeSPO"; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  virtual void report() {}
+
+  void setOrbitalSetSize(int norbs) override;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
index c162b9985c..b126cfec86 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
@@ -20,6 +20,7 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/EinsplineSetBuilder.h"
 #include "QMCWaveFunctions/RotatedSPOs.h"
+#include "QMCWaveFunctions/RotatedSPOsT.h"
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "checkMatrix.hpp"
 #include "FakeSPO.h"
@@ -651,7 +652,17 @@ opt_variables_type& getMyVars(SPOSetT<double>& rot) { return rot.myVars; }
 opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& rot) { return rot.myVars; }
 opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& rot) { return rot.myVars; }
 opt_variables_type& getMyVarsFull(RotatedSPOs& rot) { return rot.myVarsFull; }
+opt_variables_type& getMyVarsFull(RotatedSPOsT<double>& rot) { return rot.myVarsFull; }
+opt_variables_type& getMyVarsFull(RotatedSPOsT<float>& rot) { return rot.myVarsFull; }
 std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOs& rot) { return rot.history_params_; }
+std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOsT<double>& rot)
+{
+  return rot.history_params_;
+}
+std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOsT<float>& rot)
+{
+  return rot.history_params_;
+}
 } // namespace testing
 
 // Test using global rotation
@@ -706,6 +717,58 @@ TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction]")
   CHECK(full_var[5] == Approx(0.0));
 }
 
+// Test using global rotation
+TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction][template]", double, float)
+{
+  auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo->setOrbitalSetSize(4);
+  RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
+  int nel = 2;
+  rot.buildOptVariables(nel);
+
+  optimize::VariableSet vs;
+  rot.checkInVariablesExclusive(vs);
+  vs[0] = 0.1;
+  vs[1] = 0.15;
+  vs[2] = 0.2;
+  vs[3] = 0.25;
+  rot.resetParametersExclusive(vs);
+
+  {
+    hdf_archive hout;
+    vs.writeToHDF("rot_vp.h5", hout);
+
+    rot.writeVariationalParameters(hout);
+  }
+
+  auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo2->setOrbitalSetSize(4);
+
+  RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
+  rot2.buildOptVariables(nel);
+
+  optimize::VariableSet vs2;
+  rot2.checkInVariablesExclusive(vs2);
+
+  hdf_archive hin;
+  vs2.readFromHDF("rot_vp.h5", hin);
+  rot2.readVariationalParameters(hin);
+
+  opt_variables_type& var = testing::getMyVars(rot2);
+  CHECK(var[0] == Approx(vs[0]));
+  CHECK(var[1] == Approx(vs[1]));
+  CHECK(var[2] == Approx(vs[2]));
+  CHECK(var[3] == Approx(vs[3]));
+
+  opt_variables_type& full_var = testing::getMyVarsFull(rot2);
+  CHECK(full_var[0] == Approx(vs[0]));
+  CHECK(full_var[1] == Approx(vs[1]));
+  CHECK(full_var[2] == Approx(vs[2]));
+  CHECK(full_var[3] == Approx(vs[3]));
+  CHECK(full_var[4] == Approx(0.0));
+  CHECK(full_var[5] == Approx(0.0));
+}
+
 // Test using history list.
 TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction]")
 {

From 1368143f605823e148268878b0d393c1b7932eeb Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Fri, 18 Aug 2023 13:21:14 -0400
Subject: [PATCH 04/17] Revert test_RotatedSPOs.cpp

---
 .../tests/test_RotatedSPOs.cpp                | 68 -------------------
 1 file changed, 68 deletions(-)

diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
index b126cfec86..39e35c9c70 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
@@ -20,8 +20,6 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/EinsplineSetBuilder.h"
 #include "QMCWaveFunctions/RotatedSPOs.h"
-#include "QMCWaveFunctions/RotatedSPOsT.h"
-#include "QMCWaveFunctions/SPOSetT.h"
 #include "checkMatrix.hpp"
 #include "FakeSPO.h"
 #include <ResourceCollection.h>
@@ -647,22 +645,8 @@ TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction]")
 namespace testing
 {
 opt_variables_type& getMyVars(SPOSet& rot) { return rot.myVars; }
-opt_variables_type& getMyVars(SPOSetT<float>& rot) { return rot.myVars; }
-opt_variables_type& getMyVars(SPOSetT<double>& rot) { return rot.myVars; }
-opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& rot) { return rot.myVars; }
-opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& rot) { return rot.myVars; }
 opt_variables_type& getMyVarsFull(RotatedSPOs& rot) { return rot.myVarsFull; }
-opt_variables_type& getMyVarsFull(RotatedSPOsT<double>& rot) { return rot.myVarsFull; }
-opt_variables_type& getMyVarsFull(RotatedSPOsT<float>& rot) { return rot.myVarsFull; }
 std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOs& rot) { return rot.history_params_; }
-std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOsT<double>& rot)
-{
-  return rot.history_params_;
-}
-std::vector<std::vector<QMCTraits::RealType>>& getHistoryParams(RotatedSPOsT<float>& rot)
-{
-  return rot.history_params_;
-}
 } // namespace testing
 
 // Test using global rotation
@@ -717,58 +701,6 @@ TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction]")
   CHECK(full_var[5] == Approx(0.0));
 }
 
-// Test using global rotation
-TEMPLATE_TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction][template]", double, float)
-{
-  auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
-  fake_spo->setOrbitalSetSize(4);
-  RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
-  int nel = 2;
-  rot.buildOptVariables(nel);
-
-  optimize::VariableSet vs;
-  rot.checkInVariablesExclusive(vs);
-  vs[0] = 0.1;
-  vs[1] = 0.15;
-  vs[2] = 0.2;
-  vs[3] = 0.25;
-  rot.resetParametersExclusive(vs);
-
-  {
-    hdf_archive hout;
-    vs.writeToHDF("rot_vp.h5", hout);
-
-    rot.writeVariationalParameters(hout);
-  }
-
-  auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
-  fake_spo2->setOrbitalSetSize(4);
-
-  RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
-  rot2.buildOptVariables(nel);
-
-  optimize::VariableSet vs2;
-  rot2.checkInVariablesExclusive(vs2);
-
-  hdf_archive hin;
-  vs2.readFromHDF("rot_vp.h5", hin);
-  rot2.readVariationalParameters(hin);
-
-  opt_variables_type& var = testing::getMyVars(rot2);
-  CHECK(var[0] == Approx(vs[0]));
-  CHECK(var[1] == Approx(vs[1]));
-  CHECK(var[2] == Approx(vs[2]));
-  CHECK(var[3] == Approx(vs[3]));
-
-  opt_variables_type& full_var = testing::getMyVarsFull(rot2);
-  CHECK(full_var[0] == Approx(vs[0]));
-  CHECK(full_var[1] == Approx(vs[1]));
-  CHECK(full_var[2] == Approx(vs[2]));
-  CHECK(full_var[3] == Approx(vs[3]));
-  CHECK(full_var[4] == Approx(0.0));
-  CHECK(full_var[5] == Approx(0.0));
-}
-
 // Test using history list.
 TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction]")
 {

From 3721d46cd0a9b10293a41fc4c43a9b87d4cbe3b9 Mon Sep 17 00:00:00 2001
From: Philip Fackler <facklerpw@ornl.gov>
Date: Fri, 18 Aug 2023 11:17:08 -0400
Subject: [PATCH 05/17] Reorder specialized function definitions to appease OMP
 target compilation

---
 .../ElectronGas/FreeOrbitalT.cpp              | 110 +++++++++---------
 1 file changed, 53 insertions(+), 57 deletions(-)

diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
index 81bc37cc79..82428ebfe1 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
@@ -20,63 +20,6 @@
 namespace qmcplusplus
 {
 
-template<class T>
-FreeOrbitalT<T>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart) : SPOSetT<T>(my_name)
-{}
-
-//Explicit template specialization
-template<>
-FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<float>(my_name),
-      kvecs(kpts_cart),
-      mink(1), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
-{
-  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
-}
-
-template<>
-FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<double>(my_name),
-      kvecs(kpts_cart),
-      mink(1), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
-{
-  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
-}
-
-template<>
-FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<std::complex<float>>(my_name),
-      kvecs(kpts_cart),
-      mink(0), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
-{
-  this->OrbitalSetSize = maxk; // SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
-}
-
-template<>
-FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
-    : SPOSetT<std::complex<double>>(my_name),
-      kvecs(kpts_cart),
-      mink(0), // treat k=0 as special case
-      maxk(kpts_cart.size()),
-      k2neg(maxk)
-{
-  this->OrbitalSetSize = maxk; // SPOSet member
-  for (int ik = 0; ik < maxk; ik++)
-    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
-}
-
 
 template<class T>
 void FreeOrbitalT<T>::evaluateVGL(const ParticleSet& P,
@@ -696,6 +639,59 @@ void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
   }
 }
 
+//Explicit template specialization
+template<>
+FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<float>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<double>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<float>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<double>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
 
 template<class T>
 void FreeOrbitalT<T>::report(const std::string& pad) const

From 321dc079990a4fe1382779c70b34cfb466f44ddf Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Fri, 18 Aug 2023 16:35:43 -0400
Subject: [PATCH 06/17] Add ConstantSPOSetT

---
 src/QMCWaveFunctions/tests/CMakeLists.txt     |   3 +-
 .../tests/ConstantSPOSetT.cpp                 | 124 ++++++++++++++++
 src/QMCWaveFunctions/tests/ConstantSPOSetT.h  |  93 ++++++++++++
 .../tests/test_ConstantSPOSetT.cpp            | 136 ++++++++++++++++++
 4 files changed, 355 insertions(+), 1 deletion(-)
 create mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
 create mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSetT.h
 create mode 100644 src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp

diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt
index 5c9484da4c..b414f0158b 100644
--- a/src/QMCWaveFunctions/tests/CMakeLists.txt
+++ b/src/QMCWaveFunctions/tests/CMakeLists.txt
@@ -111,6 +111,7 @@ set(SPOSET_SRC
     test_hybridrep.cpp
     test_pw.cpp
     test_ConstantSPOSet.cpp
+    test_ConstantSPOSetT.cpp
     ${MO_SRCS})
 if(NiO_a16_H5_FOUND)
   set(SPOSET_SRC ${SPOSET_SRC} test_einset_NiO_a16.cpp)
@@ -139,7 +140,7 @@ set(DETERMINANT_SRC
     test_ci_configuration.cpp
     test_multi_slater_determinant.cpp)
 
-add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp)
+add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp ConstantSPOSetT.cpp)
 target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(sposets_for_testing PUBLIC qmcwfs)
 
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
new file mode 100644
index 0000000000..49e5070241
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
@@ -0,0 +1,124 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 Raymond Clay and QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "ConstantSPOSetT.h"
+
+namespace qmcplusplus
+{
+
+template<class T>
+ConstantSPOSetT<T>::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals)
+    : SPOSetT<T>(my_name), numparticles_(nparticles)
+{
+  this->OrbitalSetSize = norbitals;
+  ref_psi_.resize(numparticles_, this->OrbitalSetSize);
+  ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
+  ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
+
+  ref_psi_   = 0.0;
+  ref_egrad_ = 0.0;
+  ref_elapl_ = 0.0;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> ConstantSPOSetT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<ConstantSPOSetT<T>>(this->my_name_, numparticles_, this->OrbitalSetSize);
+  myclone->setRefVals(ref_psi_);
+  myclone->setRefEGrads(ref_egrad_);
+  myclone->setRefELapls(ref_elapl_);
+  return myclone;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+{
+  APP_ABORT("ConstantSPOSet should not call checkOutVariables");
+};
+
+template<class T>
+void ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
+{
+  APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
+{
+  assert(vals.cols() == this->OrbitalSetSize);
+  assert(vals.rows() == numparticles_);
+  ref_psi_ = vals;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
+{
+  assert(grads.cols() == this->OrbitalSetSize);
+  assert(grads.rows() == numparticles_);
+  ref_egrad_ = grads;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
+{
+  assert(lapls.cols() == this->OrbitalSetSize);
+  assert(lapls.rows() == numparticles_);
+  ref_elapl_ = lapls;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  const auto* vp = dynamic_cast<const VirtualParticleSet*>(&P);
+  int ptcl       = vp ? vp->refPtcl : iat;
+  assert(psi.size() == this->OrbitalSetSize);
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+    psi[iorb] = ref_psi_(ptcl, iorb);
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluateVGL(const ParticleSet& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi)
+{
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+  {
+    psi[iorb]   = ref_psi_(iat, iorb);
+    dpsi[iorb]  = ref_egrad_(iat, iorb);
+    d2psi[iorb] = ref_elapl_(iat, iorb);
+  }
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector v(logdet[i], logdet.cols());
+    GradVector g(dlogdet[i], dlogdet.cols());
+    ValueVector l(d2logdet[i], d2logdet.cols());
+    evaluateVGL(P, iat, v, g, l);
+  }
+}
+
+template class ConstantSPOSetT<float>;
+template class ConstantSPOSetT<double>;
+template class ConstantSPOSetT<std::complex<float>>;
+template class ConstantSPOSetT<std::complex<double>>;
+
+} //namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
new file mode 100644
index 0000000000..a6e16f8e3d
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
@@ -0,0 +1,93 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 Raymond Clay and QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_CONSTANTSPOSET_H
+#define QMCPLUSPLUS_CONSTANTSPOSET_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+/** Constant SPOSet for testing purposes.  Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc.,
+   *  These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc.
+   *  Exists to provide deterministic and known output to objects requiring SPOSet evaluations.      
+   *
+   */
+template<class T>
+class ConstantSPOSetT : public SPOSetT<T>
+{
+public:
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+
+  ConstantSPOSetT(const std::string& my_name) = delete;
+
+  //Constructor needs number of particles and number of orbitals.  This is the minimum
+  //amount of information needed to sanely construct all data members and perform size
+  //checks later.
+  ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals);
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  std::string getClassName() const final { return "ConstantSPOSet"; };
+
+  void checkOutVariables(const opt_variables_type& active) final;
+
+  void setOrbitalSetSize(int norbs) final;
+
+  /**
+  * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_.
+  * @param Nelec x Nion ValueType matrix of \phi_j(r_i)
+  * @return void
+  */
+  void setRefVals(const ValueMatrix& vals);
+  /**
+  * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_.
+  * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i)
+  * @return void
+  */
+  void setRefEGrads(const GradMatrix& grads);
+  /**
+  * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_.
+  * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i)
+  * @return void
+  */
+  void setRefELapls(const ValueMatrix& lapls);
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+private:
+  const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices.
+                           /// To ensure consistent array sizing and enforcement,
+                           /// we agree at construction how large these matrices will be.
+                           /// norb is stored in SPOSet::OrbitalSetSize.
+
+  //Value, electron gradient, and electron laplacian at "reference configuration".
+  //i.e. before any attempted moves.
+
+  ValueMatrix ref_psi_;
+  GradMatrix ref_egrad_;
+  ValueMatrix ref_elapl_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
new file mode 100644
index 0000000000..87425bbb91
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
@@ -0,0 +1,136 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "catch.hpp"
+#include "Configuration.h"
+#include "QMCWaveFunctions/WaveFunctionTypes.hpp"
+#include "QMCWaveFunctions/tests/ConstantSPOSetT.h"
+#include "Utilities/for_testing/checkMatrix.hpp"
+namespace qmcplusplus
+{
+//Ray:  Figure out how to template me on value type.
+TEST_CASE("ConstantSPOSetT", "[wavefunction]")
+{
+  //For now, do a small square case.
+  const int nelec   = 2;
+  const int norb    = 2;
+  using WF          = WaveFunctionTypes<QMCTraits::ValueType, QMCTraits::FullPrecValueType>;
+  using Real        = WF::Real;
+  using Value       = WF::Value;
+  using Grad        = WF::Grad;
+  using ValueVector = Vector<Value>;
+  using GradVector  = Vector<Grad>;
+  using ValueMatrix = Matrix<Value>;
+  using GradMatrix  = Matrix<Grad>;
+
+  ValueVector row0{Value(0.92387953), Value(0.92387953)};
+  ValueVector row1{Value(0.29131988), Value(0.81078057)};
+
+  GradVector grow0{Grad({-2.22222, -1.11111, 0.33333}), Grad({8.795388, -0.816057, -0.9238793})};
+  GradVector grow1{Grad({2.22222, 1.11111, -0.33333}), Grad({-8.795388, 0.816057, 0.9238793})};
+
+  ValueVector lrow0{Value(-0.2234545), Value(0.72340234)};
+  ValueVector lrow1{Value(-12.291810), Value(6.879057)};
+
+
+  ValueMatrix spomat;
+  GradMatrix gradspomat;
+  ValueMatrix laplspomat;
+
+  spomat.resize(nelec, norb);
+  gradspomat.resize(nelec, norb);
+  laplspomat.resize(nelec, norb);
+
+  for (int iorb = 0; iorb < norb; iorb++)
+  {
+    spomat(0, iorb) = row0[iorb];
+    spomat(1, iorb) = row1[iorb];
+
+    gradspomat(0, iorb) = grow0[iorb];
+    gradspomat(1, iorb) = grow1[iorb];
+
+    laplspomat(0, iorb) = lrow0[iorb];
+    laplspomat(1, iorb) = lrow1[iorb];
+  }
+
+
+  const SimulationCell simulation_cell;
+  ParticleSet elec(simulation_cell);
+
+  elec.create({nelec});
+
+  ValueVector psiV = {0.0, 0.0};
+  ValueVector psiL = {0.0, 0.0};
+  GradVector psiG;
+  psiG.resize(norb);
+
+  //Test of value only constructor.
+  auto sposet = std::make_unique<ConstantSPOSetT<Value>>("constant_spo", nelec, norb);
+  sposet->setRefVals(spomat);
+  sposet->setRefEGrads(gradspomat);
+  sposet->setRefELapls(laplspomat);
+
+  sposet->evaluateValue(elec, 0, psiV);
+
+  CHECK(psiV[0] == row0[0]);
+  CHECK(psiV[1] == row0[1]);
+
+
+  psiV = 0.0;
+
+  sposet->evaluateValue(elec, 1, psiV);
+  CHECK(psiV[0] == row1[0]);
+  CHECK(psiV[1] == row1[1]);
+
+  psiV = 0.0;
+
+  sposet->evaluateVGL(elec, 1, psiV, psiG, psiL);
+
+  for (int iorb = 0; iorb < norb; iorb++)
+  {
+    CHECK(psiV[iorb] == row1[iorb]);
+    CHECK(psiL[iorb] == lrow1[iorb]);
+
+    for (int idim = 0; idim < OHMMS_DIM; idim++)
+      CHECK(psiG[iorb][idim] == grow1[iorb][idim]);
+  }
+  //Test of evaluate_notranspose.
+  ValueMatrix phimat, lphimat;
+  GradMatrix gphimat;
+  phimat.resize(nelec, norb);
+  gphimat.resize(nelec, norb);
+  lphimat.resize(nelec, norb);
+
+  const int first_index = 0; //Only 2 electrons in this case.
+  const int last_index  = 2;
+  sposet->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat);
+
+  checkMatrix(phimat, spomat);
+  checkMatrix(lphimat, laplspomat);
+
+  //Test of makeClone()
+  auto sposet_vgl2 = sposet->makeClone();
+  phimat           = 0.0;
+  gphimat          = 0.0;
+  lphimat          = 0.0;
+
+  sposet_vgl2->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat);
+
+  checkMatrix(phimat, spomat);
+  checkMatrix(lphimat, laplspomat);
+
+  //Lastly, check if name is correct.
+  std::string myname = sposet_vgl2->getClassName();
+  std::string targetstring("ConstantSPOSet");
+  CHECK(myname == targetstring);
+}
+} // namespace qmcplusplus

From 9a31e5077d25ef124aa4a528c3a5b42d53d318f3 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 22 Aug 2023 14:46:44 -0400
Subject: [PATCH 07/17] Refactor BsplineSet and SplineC2C

Follow existing pattern for SplineC2C allowing for std::complex<T>
---
 .../BsplineFactory/BsplineSetT.h              | 249 ++++++
 .../BsplineFactory/SplineC2CT.cpp             | 800 ++++++++++++++++++
 .../BsplineFactory/SplineC2CT.h               | 236 ++++++
 src/QMCWaveFunctions/CMakeLists.txt           |   2 +-
 4 files changed, 1286 insertions(+), 1 deletion(-)
 create mode 100644 src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
 create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
 create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h

diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
new file mode 100644
index 0000000000..8ef12b8524
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
@@ -0,0 +1,249 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file BsplineSetT.h
+ *
+ * BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implementation
+ */
+#ifndef QMCPLUSPLUS_BSPLINESET_H
+#define QMCPLUSPLUS_BSPLINESET_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "spline/einspline_engine.hpp"
+#include "spline/einspline_util.hpp"
+
+namespace qmcplusplus
+{
+/** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R.
+ * Its derived template classes manage the storage and evaluation at given precision.
+ * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class.
+ */
+template<class T>
+class BsplineSetT : public SPOSetT<T>
+{
+public:
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using GGGVector   = typename SPOSetT<T>::GGGVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+
+  using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
+  using grad_type  = typename SPOSetT<T>::GradMatrix::value_type;
+
+  // used in derived classes
+  using RealType  = typename SPOSetT<T>::RealType;
+  using ValueType = typename SPOSetT<T>::ValueType;
+
+  BsplineSetT(const std::string& my_name) : SPOSetT<T>(my_name), MyIndex(0), first_spo(0), last_spo(0) {}
+
+  virtual bool isComplex() const         = 0;
+  virtual std::string getKeyword() const = 0;
+
+  auto& getHalfG() const { return HalfG; }
+
+  inline void init_base(int n)
+  {
+    kPoints.resize(n);
+    MakeTwoCopies.resize(n);
+    BandIndexMap.resize(n);
+    for (int i = 0; i < n; i++)
+      BandIndexMap[i] = i;
+  }
+
+  ///remap kpoints to group general kpoints & special kpoints
+  int remap_kpoints()
+  {
+    std::vector<PosType> k_copy(kPoints);
+    const int nk = kPoints.size();
+    int nCB      = 0;
+    //two pass
+    for (int i = 0; i < nk; ++i)
+    {
+      if (MakeTwoCopies[i])
+      {
+        kPoints[nCB]        = k_copy[i];
+        BandIndexMap[nCB++] = i;
+      }
+    }
+    int nRealBands = nCB;
+    for (int i = 0; i < nk; ++i)
+    {
+      if (!MakeTwoCopies[i])
+      {
+        kPoints[nRealBands]        = k_copy[i];
+        BandIndexMap[nRealBands++] = i;
+      }
+    }
+    return nCB; //return the number of complex bands
+  }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override = 0;
+
+  void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      ValueVector l(d2logdet[i], d2logdet.cols());
+      this->evaluateVGL(P, iat, v, g, l);
+    }
+  }
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    const size_t nw = spo_list.size();
+    std::vector<ValueVector> mw_psi_v;
+    std::vector<GradVector> mw_dpsi_v;
+    std::vector<ValueVector> mw_d2psi_v;
+    RefVector<ValueVector> psi_v_list;
+    RefVector<GradVector> dpsi_v_list;
+    RefVector<ValueVector> d2psi_v_list;
+    mw_psi_v.reserve(nw);
+    mw_dpsi_v.reserve(nw);
+    mw_d2psi_v.reserve(nw);
+    psi_v_list.reserve(nw);
+    dpsi_v_list.reserve(nw);
+    d2psi_v_list.reserve(nw);
+
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      mw_psi_v.clear();
+      mw_dpsi_v.clear();
+      mw_d2psi_v.clear();
+      psi_v_list.clear();
+      dpsi_v_list.clear();
+      d2psi_v_list.clear();
+
+      for (int iw = 0; iw < nw; iw++)
+      {
+        mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols());
+        mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
+        mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
+        psi_v_list.push_back(mw_psi_v.back());
+        dpsi_v_list.push_back(mw_dpsi_v.back());
+        d2psi_v_list.push_back(mw_d2psi_v.back());
+      }
+
+      this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+    }
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      this->evaluateVGH(P, iat, v, g, h);
+    }
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
+      this->evaluateVGHGH(P, iat, v, g, h, gh);
+    }
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override
+  {
+    //Do nothing, since Einsplines don't explicitly depend on ion positions.
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    //Do nothing, since Einsplines don't explicitly depend on ion positions.
+  }
+
+  template<class BSPLINESPO>
+  friend struct SplineSetReader;
+  friend struct BsplineReaderBase;
+
+
+protected:
+  static const int D = QMCTraits::DIM;
+  ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases
+  size_t MyIndex;
+  ///first index of the SPOs this Spline handles
+  size_t first_spo;
+  ///last index of the SPOs this Spline handles
+  size_t last_spo;
+  ///sign bits at the G/2 boundaries
+  TinyVector<int, D> HalfG;
+  ///flags to unpack sin/cos
+  std::vector<bool> MakeTwoCopies;
+  /** kpoints for each unique orbitals.
+   * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated.
+   * Changing the sign requires updating all the evaluation code.
+   */
+  std::vector<PosType> kPoints;
+  ///remap splines to orbitals
+  aligned_vector<int> BandIndexMap;
+  ///band offsets used for communication
+  std::vector<int> offset;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
new file mode 100644
index 0000000000..155dd8a220
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
@@ -0,0 +1,800 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include <complex>
+#include "Concurrency/OpenMP.h"
+#include "SplineC2CT.h"
+#include "spline2/MultiBsplineEval.hpp"
+#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp"
+#include "CPU/math.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+SplineC2CT<T>::SplineC2CT(const SplineC2CT& in) = default;
+
+template<class T>
+inline void SplineC2CT<T>::set_spline(SingleSplineType* spline_r,
+                                      SingleSplineType* spline_i,
+                                      int twist,
+                                      int ispline,
+                                      int level)
+{
+  SplineInst->copy_spline(spline_r, 2 * ispline);
+  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
+}
+
+template<class T>
+bool SplineC2CT<T>::read_splines(hdf_archive& h5f)
+{
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+}
+
+template<class T>
+bool SplineC2CT<T>::write_splines(hdf_archive& h5f)
+{
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+}
+
+template<class T>
+void SplineC2CT<T>::storeParamsBeforeRotation()
+{
+  const auto spline_ptr     = SplineInst->getSplinePtr();
+  const auto coefs_tot_size = spline_ptr->coefs_size;
+  coef_copy_                = std::make_shared<std::vector<RealType>>(coefs_tot_size);
+
+  std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
+}
+
+/*
+  ~~ Notes for rotation ~~
+  spl_coefs      = Raw pointer to spline coefficients
+  basis_set_size = Number of spline coefs per orbital
+  OrbitalSetSize = Number of orbitals (excluding padding)
+
+  spl_coefs has a complicated layout depending on dimensionality of splines.
+  Luckily, for our purposes, we can think of spl_coefs as pointing to a
+  matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline
+  index adjacent in memory. The orbital index is SIMD aligned and therefore
+  may include padding.
+
+  As a result, due to SIMD alignment, Nsplines may be larger than the
+  actual number of splined orbitals. This means that in practice rot_mat
+  may be smaller than the number of 'columns' in the coefs array!
+
+      SplineR2R spl_coef layout:
+             ^         | sp1 | ... | spN | pad |
+             |         |=====|=====|=====|=====|
+             |         | c11 | ... | c1N | 0   |
+      basis_set_size   | c21 | ... | c2N | 0   |
+             |         | ... | ... | ... | 0   |
+             |         | cM1 | ... | cMN | 0   |
+             v         |=====|=====|=====|=====|
+                       <------ Nsplines ------>
+
+      SplineC2C spl_coef layout:
+             ^         | sp1_r | sp1_i |  ...  | spN_r | spN_i |  pad  |
+             |         |=======|=======|=======|=======|=======|=======|
+             |         | c11_r | c11_i |  ...  | c1N_r | c1N_i |   0   |
+      basis_set_size   | c21_r | c21_i |  ...  | c2N_r | c2N_i |   0   |
+             |         |  ...  |  ...  |  ...  |  ...  |  ...  |  ...  |
+             |         | cM1_r | cM1_i |  ...  | cMN_r | cMN_i |   0   |
+             v         |=======|=======|=======|=======|=======|=======|
+                       <------------------ Nsplines ------------------>
+
+  NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs
+  "matrix" is very tall and skinny.
+*/
+template<class T>
+void SplineC2CT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp
+  const auto spline_ptr = SplineInst->getSplinePtr();
+  assert(spline_ptr != nullptr);
+  const auto spl_coefs      = spline_ptr->coefs;
+  const auto Nsplines       = spline_ptr->num_splines; // May include padding
+  const auto coefs_tot_size = spline_ptr->coefs_size;
+  const auto basis_set_size = coefs_tot_size / Nsplines;
+  assert(this->OrbitalSetSize == rot_mat.rows());
+  assert(this->OrbitalSetSize == rot_mat.cols());
+
+  if (!use_stored_copy)
+  {
+    assert(coef_copy_ != nullptr);
+    std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
+  }
+
+  for (int i = 0; i < basis_set_size; i++)
+    for (int j = 0; j < this->OrbitalSetSize; j++)
+    {
+      // cur_elem points to the real componend of the coefficient.
+      // Imag component is adjacent in memory.
+      const auto cur_elem = Nsplines * i + 2 * j;
+      RealType newval_r{0.};
+      RealType newval_i{0.};
+      for (auto k = 0; k < this->OrbitalSetSize; k++)
+      {
+        const auto index = Nsplines * i + 2 * k;
+        RealType zr      = (*coef_copy_)[index];
+        RealType zi      = (*coef_copy_)[index + 1];
+        RealType wr      = rot_mat[k][j].real();
+        RealType wi      = rot_mat[k][j].imag();
+        newval_r += zr * wr - zi * wi;
+        newval_i += zr * wi + zi * wr;
+      }
+      spl_coefs[cur_elem]     = newval_r;
+      spl_coefs[cur_elem + 1] = newval_i;
+    }
+}
+
+template<class T>
+inline void SplineC2CT<T>::assign_v(const PointType& r,
+                                    const vContainer_type& myV,
+                                    ValueVector& psi,
+                                    int first,
+                                    int last) const
+{
+  const auto kPointsSize = this->kPoints.size();
+  // protect last
+  last = last > kPointsSize ? kPointsSize : last;
+
+  const RealType x = r[0], y = r[1], z = r[2];
+  const RealType* restrict kx = myKcart.data(0);
+  const RealType* restrict ky = myKcart.data(1);
+  const RealType* restrict kz = myKcart.data(2);
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    RealType s, c;
+    const RealType val_r = myV[2 * j];
+    const RealType val_i = myV[2 * j + 1];
+    qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
+    psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+    assign_v(r, myV, psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                      ValueVector& psi,
+                                      const ValueVector& psiinv,
+                                      std::vector<ValueType>& ratios)
+{
+  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
+
+#pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    // initialize thread private ratios
+    if (need_resize)
+    {
+      if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime
+        ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
+#pragma omp barrier
+    }
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), tid, first, last);
+    const int first_cplx   = first / 2;
+    const auto kPointsSize = this->kPoints.size();
+    const int last_cplx    = kPointsSize < last / 2 ? kPointsSize : last / 2;
+
+    for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+    {
+      const PointType& r = VP.activeR(iat);
+      PointType ru(PrimLattice.toUnit_floor(r));
+
+      spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+      assign_v(r, myV, psi, first_cplx, last_cplx);
+      ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx);
+    }
+  }
+
+  // do the reduction manually
+  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+  {
+    ratios[iat] = ComplexT(0);
+    for (int tid = 0; tid < ratios_private.cols(); tid++)
+      ratios[iat] += ratios_private[iat][tid];
+  }
+}
+
+/** assign_vgl
+   */
+template<class T>
+inline void SplineC2CT<T>::assign_vgl(const PointType& r,
+                                      ValueVector& psi,
+                                      GradVector& dpsi,
+                                      ValueVector& d2psi,
+                                      int first,
+                                      int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last > kPointsSize ? kPointsSize : last;
+
+  constexpr RealType zero(0);
+  constexpr RealType two(2);
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+  const RealType symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const RealType lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
+    const RealType lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
+    const RealType lap_r   = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const RealType lap_i   = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+    const size_t psiIndex  = j + this->first_spo;
+    psi[psiIndex]          = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]      = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]      = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]      = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+    d2psi[psiIndex]        = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
+  }
+}
+
+/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
+   */
+template<class T>
+inline void SplineC2CT<T>::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  constexpr RealType two(2);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0 = myG.data(0);
+  const RealType* restrict g1 = myG.data(1);
+  const RealType* restrict g2 = myG.data(2);
+
+  const size_t N = this->last_spo - this->first_spo;
+#pragma omp simd
+  for (size_t j = 0; j < N; ++j)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g0[jr];
+    const RealType dY_r = g1[jr];
+    const RealType dZ_r = g2[jr];
+
+    const RealType dX_i = g0[ji];
+    const RealType dY_i = g1[ji];
+    const RealType dZ_i = g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const RealType lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const RealType lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+    d2psi[psiIndex]       = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGL(const ParticleSet& P,
+                                const int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::assign_vgh(const PointType& r,
+                               ValueVector& psi,
+                               GradVector& dpsi,
+                               HessVector& grad_grad_psi,
+                               int first,
+                               int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last > kPointsSize ? kPointsSize : last;
+
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    const RealType h_xx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
+    const RealType h_xy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
+    const RealType h_xz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
+    const RealType h_yx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
+    const RealType h_yy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
+    const RealType h_yz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
+    const RealType h_zx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
+    const RealType h_zy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
+    const RealType h_zz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
+
+    const RealType h_xx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
+    const RealType h_xy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
+    const RealType h_xz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
+    const RealType h_yx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
+    const RealType h_yy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
+    const RealType h_yz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
+    const RealType h_zx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
+    const RealType h_zy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
+    const RealType h_zz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGH(const ParticleSet& P,
+                                const int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                HessVector& grad_grad_psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::assign_vghgh(const PointType& r,
+                                 ValueVector& psi,
+                                 GradVector& dpsi,
+                                 HessVector& grad_grad_psi,
+                                 GGGVector& grad_grad_grad_psi,
+                                 int first,
+                                 int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last < 0 ? kPointsSize : (last > kPointsSize ? kPointsSize : last);
+
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+  const RealType* restrict gh000 = mygH.data(0);
+  const RealType* restrict gh001 = mygH.data(1);
+  const RealType* restrict gh002 = mygH.data(2);
+  const RealType* restrict gh011 = mygH.data(3);
+  const RealType* restrict gh012 = mygH.data(4);
+  const RealType* restrict gh022 = mygH.data(5);
+  const RealType* restrict gh111 = mygH.data(6);
+  const RealType* restrict gh112 = mygH.data(7);
+  const RealType* restrict gh122 = mygH.data(8);
+  const RealType* restrict gh222 = mygH.data(9);
+
+//SIMD doesn't work quite right yet.  Comment out until further debugging.
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
+    const RealType f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
+    const RealType f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
+    const RealType f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
+    const RealType f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
+    const RealType f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
+    const RealType f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
+
+    const RealType f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
+    const RealType f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
+    const RealType f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
+    const RealType f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
+    const RealType f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
+    const RealType f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
+
+    const RealType h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
+    const RealType h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
+    const RealType h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
+    const RealType h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
+    const RealType h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
+    const RealType h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
+
+    const RealType h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
+    const RealType h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
+    const RealType h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
+    const RealType h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
+    const RealType h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
+    const RealType h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+
+    //These are the real and imaginary components of the third SPO derivative.  _xxx denotes
+    // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
+
+    const RealType f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const RealType f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const RealType f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const RealType f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const RealType f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const RealType f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const RealType f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const RealType f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const RealType f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const RealType f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    const RealType f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const RealType f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const RealType f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const RealType f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const RealType f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const RealType f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const RealType f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const RealType f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const RealType f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const RealType f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+    const RealType gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
+    const RealType gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
+    const RealType gh_xxy_r =
+        f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
+    const RealType gh_xxy_i =
+        f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
+    const RealType gh_xxz_r =
+        f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
+    const RealType gh_xxz_i =
+        f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
+    const RealType gh_xyy_r =
+        f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
+    const RealType gh_xyy_i =
+        f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
+    const RealType gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
+        (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
+    const RealType gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
+        (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
+    const RealType gh_xzz_r =
+        f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
+    const RealType gh_xzz_i =
+        f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
+    const RealType gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
+    const RealType gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
+    const RealType gh_yyz_r =
+        f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
+    const RealType gh_yyz_i =
+        f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
+    const RealType gh_yzz_r =
+        f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
+    const RealType gh_yzz_i =
+        f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
+    const RealType gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
+    const RealType gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
+
+    grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
+    grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+
+    grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
+    grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+
+
+    grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGHGH(const ParticleSet& P,
+                                  const int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  HessVector& grad_grad_psi,
+                                  GGGVector& grad_grad_grad_psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
+    assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
+  }
+}
+
+template class SplineC2CT<std::complex<float>>;
+template class SplineC2CT<std::complex<double>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
new file mode 100644
index 0000000000..db93e72a43
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
@@ -0,0 +1,236 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file
+ *
+ * class to handle complex splines to complex orbitals with splines of arbitrary precision
+ */
+#ifndef QMCPLUSPLUS_SPLINE_C2C_H
+#define QMCPLUSPLUS_SPLINE_C2C_H
+
+#include <memory>
+#include "BsplineSetT.h"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "spline2/MultiBspline.hpp"
+#include "Utilities/FairDivide.h"
+
+namespace qmcplusplus
+{
+/** class to match std::complex<T> spline with BsplineSet::ValueType (complex) SPOs
+ * @tparam T precision of spline
+ *
+ * Requires temporage storage and multiplication of phase vectors
+ * The internal storage of complex spline coefficients uses double sized real arrays of T type, aligned and padded.
+ * All the output orbitals are complex.
+ */
+template<class T>
+class SplineC2CT : public BsplineSetT<T>
+{
+public:
+  using RealType         = typename BsplineSetT<T>::RealType;
+  using SplineType       = typename bspline_traits<RealType, 3>::SplineType;
+  using BCType           = typename bspline_traits<RealType, 3>::BCType;
+  using DataType         = RealType;
+  using PointType        = TinyVector<RealType, 3>;
+  using SingleSplineType = UBspline_3d_d;
+
+
+  // types for evaluation results
+  // only works for Complex
+  using ComplexT    = T;
+  using ValueType   = typename BsplineSetT<T>::ValueType;
+  using GGGVector   = typename BsplineSetT<T>::GGGVector;
+  using GradVector  = typename BsplineSetT<T>::GradVector;
+  using HessVector  = typename BsplineSetT<T>::HessVector;
+  using ValueVector = typename BsplineSetT<T>::ValueVector;
+  using ValueMatrix = typename BsplineSetT<T>::ValueMatrix;
+
+  using vContainer_type  = Vector<RealType, aligned_allocator<RealType>>;
+  using gContainer_type  = VectorSoaContainer<RealType, 3>;
+  using hContainer_type  = VectorSoaContainer<RealType, 6>;
+  using ghContainer_type = VectorSoaContainer<RealType, 10>;
+
+public:
+  SplineC2CT<T>(const std::string& my_name) : BsplineSetT<T>(my_name) {}
+
+  SplineC2CT<T>(const SplineC2CT<T>& in);
+  virtual std::string getClassName() const final { return "SplineC2C"; }
+  virtual std::string getKeyword() const final { return "SplineC2C"; }
+  bool isComplex() const final { return true; };
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<SplineC2CT<T>>(*this); }
+
+  bool isRotationSupported() const final { return true; }
+
+  /// Store an original copy of the spline coefficients for orbital rotation
+  void storeParamsBeforeRotation() final;
+
+  /*
+    Implements orbital rotations via [1,2].
+    Should be called by RotatedSPOs::apply_rotation()
+    This implementation requires that NSPOs > Nelec. In other words,
+    if you want to run a orbopt wfn, you must include some virtual orbitals!
+    Some results (using older Berkeley branch) were published in [3].
+    [1] Filippi & Fahy, JCP 112, (2000)
+    [2] Toulouse & Umrigar, JCP 126, (2007)
+    [3] Townsend et al., PRB 102, (2020)
+  */
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<T>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+
+    auto& offset = this->offset;
+    offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, offset);
+    for (size_t ib = 0; ib < offset.size(); ib++)
+      offset[ib] *= 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<T>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    const auto& kPoints = this->kPoints;
+    const size_t nk     = kPoints.size();
+    mKK.resize(nk);
+    myKcart.resize(nk);
+    for (size_t i = 0; i < nk; ++i)
+    {
+      mKK[i]     = -dot(kPoints[i], kPoints[i]);
+      myKcart(i) = kPoints[i];
+    }
+  }
+
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
+
+  bool read_splines(hdf_archive& h5f);
+
+  bool write_splines(hdf_archive& h5f);
+
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
+
+  void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override;
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<ValueType>& ratios) override;
+
+  /** assign_vgl
+   */
+  void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last)
+      const;
+
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
+   */
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluateVGL(const ParticleSet& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   ValueVector& d2psi) override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  void evaluateVGH(const ParticleSet& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     const int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override;
+
+  template<class BSPLINESPO>
+  friend struct SplineSetReader;
+  friend struct BsplineReaderBase;
+
+protected:
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
+
+private:
+  ///primitive cell
+  CrystalLattice<RealType, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
+  Tensor<RealType, 3> GGt;
+  ///multi bspline set
+  std::shared_ptr<MultiBspline<RealType>> SplineInst;
+
+  ///Copy of original splines for orbital rotation
+  std::shared_ptr<std::vector<RealType>> coef_copy_;
+
+  vContainer_type mKK;
+  VectorSoaContainer<RealType, 3> myKcart;
+
+  ///thread private ratios for reduction when using nested threading, numVP x numThread
+  Matrix<ComplexT> ratios_private;
+};
+
+extern template class SplineC2CT<float>;
+extern template class SplineC2CT<double>;
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 959e7743ae..6ad7f54cb5 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -100,7 +100,7 @@ if(OHMMS_DIM MATCHES 3)
         BsplineFactory/BsplineReaderBase.cpp)
     set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp)
     if(QMC_COMPLEX)
-      set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp)
+      set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp BsplineFactory/SplineC2CT.cpp)
       set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp)
     else(QMC_COMPLEX)
       set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp

From ee065a887c244e69fbdb82e7b46e159abf66ff14 Mon Sep 17 00:00:00 2001
From: Philip Fackler <facklerpw@ornl.gov>
Date: Wed, 16 Aug 2023 12:08:51 -0400
Subject: [PATCH 08/17] PWOribitalSetT and PWBasisT

Add FullRealType in SPOSet and RotatedSPOs

Move generic definition after specialization

add implicit implementations

Fix some errors

initial commit of templated PWOribitSetT that compiles

cleanup

templateitze PWBasis as well, as is dependancy

remove inaccurate comment

remove polluted commit
---
 src/QMCWaveFunctions/CMakeLists.txt           |   4 +-
 src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp   | 197 ++++++++++
 src/QMCWaveFunctions/PlaneWave/PWBasisT.h     | 343 ++++++++++++++++++
 .../PlaneWave/PWOrbitalSetT.cpp               | 145 ++++++++
 .../PlaneWave/PWOrbitalSetT.h                 | 130 +++++++
 5 files changed, 817 insertions(+), 2 deletions(-)
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWBasisT.h
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
 create mode 100644 src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h

diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 6ad7f54cb5..1bbfc0520f 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -111,9 +111,9 @@ if(OHMMS_DIM MATCHES 3)
   endif(HAVE_EINSPLINE)
 
   # plane wave SPO
-  set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalBuilder.cpp)
+  set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWBasisT.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalBuilder.cpp)
   if(QMC_COMPLEX)
-    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp PlaneWave/PWOrbitalSetT.cpp)
   else()
     set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp PlaneWave/PWRealOrbitalSetT.cpp)
   endif(QMC_COMPLEX)
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
new file mode 100644
index 0000000000..fe00655309
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
@@ -0,0 +1,197 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWBasisT.cpp
+ * @brief Definition of member functions of Plane-wave basis set
+ */
+#include "PWBasisT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+int PWBasisT<T>::readbasis(hdf_archive& h5basisgroup,
+                       RealType ecutoff,
+                       const ParticleLayout& lat,
+                       const std::string& pwname,
+                       const std::string& pwmultname,
+                       bool resizeContainer)
+{
+  ///make a local copy
+  Lattice = lat;
+  ecut    = ecutoff;
+  app_log() << "  PWBasisT<T>::" << pwmultname << " is found " << std::endl;
+  h5basisgroup.read(gvecs, "/electrons/kpoint_0/gvectors");
+  NumPlaneWaves = std::max(gvecs.size(), kplusgvecs_cart.size());
+  if (NumPlaneWaves == 0)
+  {
+    app_error() << "  PWBasisT<T>::readbasis Basis is missing. Abort " << std::endl;
+    abort(); //FIX_ABORT
+  }
+  if (kplusgvecs_cart.empty())
+  {
+    kplusgvecs_cart.resize(NumPlaneWaves);
+    for (int i = 0; i < NumPlaneWaves; i++)
+      kplusgvecs_cart[i] = Lattice.k_cart(gvecs[i]);
+  }
+  //app_log() << "  Gx Gy Gz " << std::endl;
+  //for(int i=0; i<kplusgvecs_cart.size(); i++)
+  //{
+  //  app_log() << kplusgvecs_cart[i] << std::endl;
+  //}
+  //Now remove elements outside Ecut. At the same time, fill k+G and |k+G| lists.
+  //Also keep track of the original index ordering (using indexmap[]) so that
+  //orbital coefficients can be ordered and trimmed for ecut in the same way.
+  //support older parser
+  if (resizeContainer)
+    reset();
+  //std::copy(gvecs.begin(),gvecs.end(),std::ostream_iterator<GIndex_t>(std::cout,"\n"));
+  return NumPlaneWaves;
+}
+
+template<class T>
+void PWBasisT<T>::setTwistAngle(const PosType& tang)
+{
+  PosType dang   = twist - tang;
+  bool sameTwist = dot(dang, dang) < std::numeric_limits<RealType>::epsilon();
+  if (maxmaxg && sameTwist)
+    return;
+  twist = tang;
+  reset();
+}
+
+template<class T>
+void PWBasisT<T>::reset()
+{
+  trimforecut();
+  //logC.resize(3,2*maxmaxg+1);
+  Z.resize(NumPlaneWaves, 2 + DIM);
+  Zv.resize(NumPlaneWaves);
+  phi.resize(NumPlaneWaves);
+}
+
+/** Remove basis elements if kinetic energy > ecut.
+ *
+ * Keep and indexmap so we know how to match coefficients on read.
+ */
+template<class T>
+void PWBasisT<T>::trimforecut()
+{
+  //Convert the twist angle to Cartesian coordinates.
+  twist_cart = Lattice.k_cart(twist);
+  inputmap.resize(NumPlaneWaves);
+  app_log() << "  PWBasisT<T>::TwistAngle (unit) =" << twist << std::endl;
+  app_log() << "  PWBasisT<T>::TwistAngle (cart) =" << twist_cart << std::endl;
+  app_log() << "  PWBasisT<T>::trimforecut NumPlaneWaves (before) =" << NumPlaneWaves << std::endl;
+  std::vector<GIndex_t> gvecCopy(gvecs);
+  std::vector<PosType> gcartCopy(kplusgvecs_cart);
+  gvecs.clear();
+  kplusgvecs_cart.clear();
+  minusModKplusG2.reserve(NumPlaneWaves);
+  //  RealType kcutoff2 = 2.0*ecut; //std::sqrt(2.0*ecut);
+  int ngIn = NumPlaneWaves;
+  for (int ig = 0, newig = 0; ig < ngIn; ig++)
+  {
+    //PosType tempvec = Lattice.k_cart(gvecCopy[ig]+twist);
+    PosType tempvec = gcartCopy[ig] + twist_cart;
+    RealType mod2   = dot(tempvec, tempvec);
+
+    // Keep all the g-vectors
+    // The cutoff energy is not stored in the HDF file now.
+    // Is truncating the gvectors to a spherical shell necessary?
+    if (true)
+    {
+      gvecs.push_back(gvecCopy[ig]);
+      kplusgvecs_cart.push_back(tempvec);
+      minusModKplusG2.push_back(-mod2);
+      //Remember which position in the HDF5 file this came from...for coefficients
+      inputmap[ig] = newig++;
+    }
+#if 0
+    if(mod2<=kcutoff2)
+    {
+      gvecs.push_back(gvecCopy[ig]);
+      kplusgvecs_cart.push_back(tempvec);
+      minusModKplusG2.push_back(-mod2);
+      //Remember which position in the HDF5 file this came from...for coefficients
+      inputmap[ig] = newig++;
+    }
+    else
+    {
+      inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves.
+      NumPlaneWaves--;
+    }
+#endif
+  }
+#if defined(PWBasisT_USE_RECURSIVE)
+  //Store the maximum number of translations, within ecut, of any reciprocal cell vector.
+  for (int ig = 0; ig < NumPlaneWaves; ig++)
+    for (int i = 0; i < OHMMS_DIM; i++)
+      if (std::abs(gvecs[ig][i]) > maxg[i])
+        maxg[i] = std::abs(gvecs[ig][i]);
+  gvecs_shifted.resize(NumPlaneWaves);
+  for (int ig = 0; ig < NumPlaneWaves; ig++)
+    gvecs_shifted[ig] = gvecs[ig] + maxg;
+  maxmaxg = std::max(maxg[0], std::max(maxg[1], maxg[2]));
+  //changes the order???? ok
+  C.resize(3, 2 * maxmaxg + 2);
+#else
+  maxmaxg = 1;
+#endif
+  //    //make a copy of input to gvecCopy
+  ////    for(int ig=0, newig=0; ig<ngIn; ig++) {
+  //      //Check size of this g-vector
+  //      PosType tempvec = Lattice.k_cart(gvecCopy[ig]+twist);
+  //      RealType mod2 = dot(tempvec,tempvec);
+  //      if(mod2<=kcutoff2){ //Keep this element
+  //        gvecs.push_back(gvecCopy[ig]);
+  //        kplusgvecs_cart.push_back(tempvec);
+  //        minusModKplusG2.push_back(-mod2);
+  //        //Remember which position in the HDF5 file this came from...for coefficients
+  //        inputmap[ig] = newig++;
+  ////#if !defined(QMC_COMPLEX)
+  ////        //Build the negative vector. See comment at declaration (above) for details.
+  ////        if(gvecCopy[ig][0] < 0)
+  ////          negative.push_back(0);
+  ////        else if(gvecCopy[ig][0] > 0)
+  ////          negative.push_back(1);
+  ////        else { //gx == 0, test gy
+  ////          if(gvecCopy[ig][1] < 0)
+  ////            negative.push_back(0);
+  ////          else if(gvecCopy[ig][1] > 0)
+  ////            negative.push_back(1);
+  ////          else { //gx == gy == 0; test gz. If gz==0 also, take negative=1 (arbitrary)
+  ////            if(gvecCopy[ig][2] < 0)
+  ////              negative.push_back(0);
+  ////            else
+  ////              negative.push_back(1);
+  ////          }
+  ////        }
+  ////#endif
+  //      } else {
+  //        inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves.
+  //        NumPlaneWaves--;
+  //      }
+  //    }
+  //Finalize the basis. Fix temporary values of inputmap.
+  //for(int ig=0; ig<inputmap.size(); ig++)
+  //  if(inputmap[ig] == -1)
+  //    inputmap[ig] = NumPlaneWaves; //For dumping coefficients of PWs>ecut
+  app_log() << "                       NumPlaneWaves (after)  =" << NumPlaneWaves << std::endl;
+}
+// template class PWBasisT<double>;
+// template class PWBasisT<float>;
+template class PWBasisT<std::complex<double>>;
+template class PWBasisT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
new file mode 100644
index 0000000000..54592b9ba7
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
@@ -0,0 +1,343 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWBasis.h
+ * @brief Declaration of Plane-wave basis set
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVEBASIS_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVEBASIS_BLAS_H
+
+#include "Configuration.h"
+#include "Particle/ParticleSet.h"
+#include "Message/Communicate.h"
+#include "type_traits/complex_help.hpp"
+#include "CPU/e2iphi.h"
+#include "hdf/hdf_archive.h"
+
+/** If defined, use recursive method to build the basis set for each position
+ *
+ * performance improvement is questionable: load vs sin/cos
+ */
+//#define PWBASIS_USE_RECURSIVE
+
+namespace qmcplusplus
+{
+/** Plane-wave basis set
+ *
+ * Rewrite of PlaneWaveBasis to utilize blas II or III
+ * Support more general input tags
+ */
+template<typename T>
+class PWBasisT : public QMCTraits
+{
+public:
+  using RealType = typename RealAlias_impl<T>::value_type;
+  using ComplexType = T;
+  using PosType = TinyVector<RealType, DIM>;
+  using IndexType = QMCTraits::IndexType;
+  using ParticleLayout = ParticleSet::ParticleLayout;
+  using GIndex_t       = TinyVector<IndexType, 3>;
+
+private:
+  ///max of maxg[i]
+  int maxmaxg;
+  //Need to store the maximum translation in each dimension to use recursive PW generation.
+  GIndex_t maxg;
+  //The PlaneWave data - keep all of these strictly private to prevent inconsistencies.
+  RealType ecut;
+  ///twist angle in reduced
+  PosType twist;
+  ///twist angle in cartesian
+  PosType twist_cart; //Twist angle in reduced and Cartesian.
+
+  ///gvecs in reduced coordiates
+  std::vector<GIndex_t> gvecs;
+  ///Reduced coordinates with offset gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
+  std::vector<GIndex_t> gvecs_shifted;
+
+  std::vector<RealType> minusModKplusG2;
+  std::vector<PosType> kplusgvecs_cart; //Cartesian.
+
+  Matrix<ComplexType> C;
+  //Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not exp(iGr)
+  //We need a way of switching between them for G -> -G, otherwise the
+  //determinant will have multiple rows that are equal (to within a constant factor)
+  //of others, giving a zero determinant. For this, we build a vector (negative) which
+  //stores whether a vector is "+" or "-" (with some criterion, to be defined). We
+  //the switch from cos() to sin() based on the value of this input.
+  std::vector<int> negative;
+
+public:
+  //enumeration for the value, laplacian, gradients and size
+  enum
+  {
+    PW_VALUE,
+    PW_LAP,
+    PW_GRADX,
+    PW_GRADY,
+    PW_GRADZ,
+    PW_MAXINDEX
+  };
+
+  Matrix<ComplexType> Z;
+
+  Vector<ComplexType> Zv;
+  /* inputmap is used for a memory efficient way of
+   *
+   * importing the basis-set and coefficients when the desired energy cutoff may be
+   * lower than that represented by all data in the wavefunction input file.
+   * The steps taken are:
+   *  - Read all basis data.
+   *  - Create map. inputmap[i] = j; j is correct PW index, i is input coef index.
+   *    For basis elements outside cutoff, inputmap[i] = gvecs.size();
+   *  - Coefficients are in same order as PWs in inputfile => simply file into
+   *    storage matrix using the map as the input. All excess coefficients are
+   *    put into [gvecs.size()] and not used. i.e. coefs need to be allocated 1 higher.
+   * Such an approach is not needed for Gamma-point only calculations because the
+   * basis is spherically ordered. However, when a twist-angle is used, the "sphere"
+   * of allowed planewaves is shifted.
+   */
+
+  Vector<RealType> phi;
+
+  std::vector<int> inputmap;
+
+  ///total number of basis functions
+  int NumPlaneWaves;
+
+  ///local copy of Lattice
+  ParticleLayout Lattice;
+
+  ///default constructor
+  PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {}
+
+  ///constructor
+  PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {}
+
+  ~PWBasisT() {}
+
+  ///set the twist angle
+  void setTwistAngle(const PosType& tang);
+
+  ///reset
+  void reset();
+
+  /** Read basisset from hdf5 file. Apply ecut.
+   * @param h5basisgroup h5 node where basis is located
+   * @param ecutoff cutoff energy
+   * @param lat CrystalLattice
+   * @param resizeContainer if true, resize internal storage.
+   * @return the number of plane waves
+   */
+  int readbasis(hdf_archive& h5basisgroup,
+                RealType ecutoff,
+                const ParticleLayout& lat,
+                const std::string& pwname     = "planewaves",
+                const std::string& pwmultname = "multipliers",
+                bool resizeContainer          = true);
+
+  /** Remove basis elements if kinetic energy > ecut.
+   *
+   * Keep and indexmap so we know how to match coefficients on read.
+   */
+  void trimforecut();
+
+#if defined(PWBASIS_USE_RECURSIVE)
+  /** Fill the recursion coefficients matrix.
+   *
+   * @todo Generalize to non-orthorohmbic cells
+   */
+  inline void BuildRecursionCoefs(const PosType& pos)
+  {
+    PosType tau_red(Lattice.toUnit(pos));
+//      RealType phi=TWOPI*tau_red[0];
+//      RealType nphi=maxg0*phi;
+//      ComplexType ct0(std::cos(phi),std::sin(phi));
+//      ComplexType t(std::cos(nphi),-std::sin(nphi));
+//      C0[0]=t;
+//      for(int n=1; n<=2*maxg0; n++) C0[n] = (t *= ct0);
+//
+//      phi=TWOPI*tau_red[1];
+//      nphi=maxg1*phi;
+//      ct0=ComplexType(std::cos(phi),std::sin(phi));
+//      t=ComplexType(std::cos(nphi),-std::sin(nphi));
+//      C1[0]=t;
+//      for(int n=1; n<=2*maxg1; n++) C1[n] = (t *= ct0);
+//
+//      phi=TWOPI*tau_red[2];
+//      nphi=maxg2*phi;
+//      ct0=ComplexType(std::cos(phi),std::sin(phi));
+//      t=ComplexType(std::cos(nphi),-std::sin(nphi));
+//      C2[0]=t;
+//      for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0);
+#pragma ivdep
+    for (int idim = 0; idim < 3; idim++)
+    {
+      int ng        = maxg[idim];
+      RealType phi  = TWOPI * tau_red[idim];
+      RealType nphi = ng * phi;
+      ComplexType Ctemp(std::cos(phi), std::sin(phi));
+      ComplexType t(std::cos(nphi), -std::sin(nphi));
+      ComplexType* restrict cp_ptr = C[idim];
+      *cp_ptr++                    = t;
+      for (int n = 1; n <= 2 * ng; n++)
+      {
+        *cp_ptr++ = (t *= Ctemp);
+      }
+    }
+    //Base version
+    //#pragma ivdep
+    //      for(int idim=0; idim<3; idim++){
+    //        RealType phi=TWOPI*tau_red[idim];
+    //        ComplexType Ctemp(std::cos(phi),std::sin(phi));
+    //        int ng=maxg[idim];
+    //        ComplexType* restrict cp_ptr=C[idim]+ng;
+    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //        *cp_ptr=1.0;
+    //        for(int n=1; n<=ng; n++,cn_ptr--){
+    //          ComplexType t(Ctemp*(*cp_ptr++));
+    //          *cp_ptr = t;
+    //          *cn_ptr = conj(t);
+    //        }
+    //      }
+    //Not valid for general supercell
+    //      // Cartesian of twist for 1,1,1 (reduced coordinates)
+    //      PosType G111(1.0,1.0,1.0);
+    //      G111 = Lattice.k_cart(G111);
+    //
+    //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
+    //      //using a fast recursion algorithm
+    //#pragma ivdep
+    //      for(int idim=0; idim<3; idim++){
+    //        //start the recursion with the 111 vector.
+    //        RealType phi = pos[idim] * G111[idim];
+    //        register ComplexType Ctemp(std::cos(phi), std::sin(phi));
+    //        int ng=maxg[idim];
+    //        ComplexType* restrict cp_ptr=C[idim]+ng;
+    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //        *cp_ptr=1.0;
+    //        for(int n=1; n<=ng; n++,cn_ptr--){
+    //          ComplexType t(Ctemp*(*cp_ptr++));
+    //          *cp_ptr = t;
+    //          *cn_ptr = conj(t);
+    //        }
+    //      }
+  }
+
+  inline void evaluate(const PosType& pos)
+  {
+    BuildRecursionCoefs(pos);
+    RealType twistdotr = dot(twist_cart, pos);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    //Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations are for (twist+G).r
+      ComplexType pw(pw0); //std::cos(twistdotr),std::sin(twistdotr));
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      //pw *= C0[gvecs_shifted[ig][0]];
+      //pw *= C1[gvecs_shifted[ig][1]];
+      //pw *= C2[gvecs_shifted[ig][2]];
+      Zv[ig] = pw;
+    }
+  }
+  /** Evaluate all planewaves and derivatives for the iat-th particle
+   *
+   * The basis functions are evaluated for particles iat: first <= iat < last
+   * Evaluate the plane-waves at current particle coordinates using a fast
+   * recursion algorithm. Order of Y,dY and d2Y is kept correct.
+   * These can be "dotted" with coefficients later to complete orbital evaluations.
+   */
+  inline void evaluateAll(const ParticleSet& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    BuildRecursionCoefs(r);
+    RealType twistdotr = dot(twist_cart, r);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    //Evaluate the planewaves and derivatives.
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
+      //are for (twist+G).r
+      ComplexType pw(pw0);
+      // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      //pw *= C0[gvecs_shifted[ig][0]];
+      //pw *= C1[gvecs_shifted[ig][1]];
+      //pw *= C2[gvecs_shifted[ig][2]];
+      zptr[0] = pw;
+      zptr[1] = minusModKplusG2[ig] * pw;
+      zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+    }
+  }
+#else
+  inline void evaluate(const PosType& pos)
+  {
+    //Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
+      phi[ig] = dot(kplusgvecs_cart[ig], pos);
+    eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
+  }
+  inline void evaluateAll(const ParticleSet& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    evaluate(r);
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
+      //are for (twist+G).r
+      ComplexType& pw = Zv[ig];
+      zptr[0]         = pw;
+      zptr[1]         = minusModKplusG2[ig] * pw;
+      zptr[2]         = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3]         = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4]         = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+    }
+  }
+#endif
+  //    /** Fill the recursion coefficients matrix.
+  //     *
+  //     * @todo Generalize to non-orthorohmbic cells
+  //     */
+  //    void BuildRecursionCoefsByAdd(const PosType& pos)
+  //    {
+  //      // Cartesian of twist for 1,1,1 (reduced coordinates)
+  //      PosType G111(1.0,1.0,1.0);
+  //      G111 = Lattice.k_cart(G111);
+  //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
+  //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
+  //      for(int idim=0; idim<3; idim++){
+  //        //start the recursion with the 111 vector.
+  //        RealType phi = pos[idim] * G111[idim];
+  //        int ng(maxg[idim]);
+  //        RealType* restrict cp_ptr=logC[idim]+ng;
+  //        RealType* restrict cn_ptr=logC[idim]+ng-1;
+  //        *cp_ptr=0.0;
+  //        //add INTEL vectorization
+  //        for(int n=1; n<=ng; n++,cn_ptr--){
+  //          RealType t(phi+*cp_ptr++);
+  //          *cp_ptr = t;
+  //          *cn_ptr = -t;
+  //        }
+  //      }
+  //    }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
new file mode 100644
index 0000000000..a3b1e135ec
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
@@ -0,0 +1,145 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "Message/Communicate.h"
+#include "PWOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+
+namespace qmcplusplus
+{
+template<class T>
+PWOrbitalSetT<T>::~PWOrbitalSetT()
+{
+  if (OwnBasisSet && myBasisSet)
+    delete myBasisSet;
+  if (!IsCloned && this->C != nullptr)
+    delete this->C;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> PWOrbitalSetT<T>::makeClone() const
+{
+  auto myclone        = std::make_unique<PWOrbitalSetT<T>>(*this);
+  myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
+  myclone->IsCloned   = true;
+  return myclone;
+}
+
+template<class T>
+void PWOrbitalSetT<T>::setOrbitalSetSize(int norbs) {}
+
+template<class T>
+void PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+{
+  myBasisSet     = bset;
+  this->OrbitalSetSize = nbands;
+  OwnBasisSet    = cleanup;
+  BasisSetSize   = myBasisSet->NumPlaneWaves;
+  this->C              = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
+  this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+  app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize =" << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize
+            << std::endl;
+}
+
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  //Evaluate every orbital for particle iat.
+  //Evaluate the basis-set at these coordinates:
+  //myBasisSet->evaluate(P,iat);
+  myBasisSet->evaluate(P.activeR(iat));
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  //Evaluate the orbitals and derivatives for particle iat only.
+  myBasisSet->evaluateAll(P, iat);
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+  const T* restrict tptr = this->Temp.data();
+  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+  {
+    psi[j]   = tptr[PW_VALUE];
+    d2psi[j] = tptr[PW_LAP];
+    dpsi[j]  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                        int first,
+                                        int last,
+                                        ValueMatrix& logdet,
+                                        GradMatrix& dlogdet,
+                                        ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    myBasisSet->evaluateAll(P, iat);
+    MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+    const T* restrict tptr = this->Temp.data();
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+    {
+      logdet(i, j)   = tptr[PW_VALUE];
+      d2logdet(i, j) = tptr[PW_LAP];
+      dlogdet(i, j)  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+    }
+  }
+}
+
+// Class concrete types from T
+// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is inherently complex
+// template class PWOrbitalSetT<double>;
+// template class PWOrbitalSetT<float>;
+template class PWOrbitalSetT<std::complex<double>>;
+template class PWOrbitalSetT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
new file mode 100644
index 0000000000..39d67f70b1
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
@@ -0,0 +1,130 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWOrbitalSetT.h
+ * @brief Definition of member functions of Plane-wave basis set
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+
+#include "QMCWaveFunctions/PlaneWave/PWBasisT.h"
+#include "type_traits/complex_help.hpp"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "CPU/BLAS.hpp"
+
+namespace qmcplusplus
+{
+
+template<class T>
+class PWOrbitalSetT : public SPOSetT<T>
+{
+  
+public:
+  using RealType = typename RealAlias_impl<T>::value_type;
+  using ComplexType = T;
+  using PosType = QMCTraits::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType = QMCTraits::GradType;
+  using IndexType = QMCTraits::IndexType;
+
+  using BasisSet_t = PWBasisT<T>;
+  using PWBasisPtr = PWBasisT<T>*;
+
+  /** inherit the enum of BasisSet_t */
+  enum
+  {
+    PW_VALUE    = BasisSet_t::PW_VALUE,
+    PW_LAP      = BasisSet_t::PW_LAP,
+    PW_GRADX    = BasisSet_t::PW_GRADX,
+    PW_GRADY    = BasisSet_t::PW_GRADY,
+    PW_GRADZ    = BasisSet_t::PW_GRADZ,
+    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+  };
+
+ 
+
+  /** default constructor
+  */
+  PWOrbitalSetT<T>(const std::string& my_name)
+      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false)
+  {}
+
+  std::string getClassName() const override { return "PWOrbitalSetT"; }
+
+
+  /** delete BasisSet only it owns this
+   *
+   * Builder takes care of who owns what
+   */
+  ~PWOrbitalSetT<T>() override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+  /** resize  the orbital base
+   * @param bset PWBasis
+   * @param nbands number of bands
+   * @param cleaup if true, owns PWBasis. Will clean up.
+   */
+  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+
+  /** Builder class takes care of the assertion
+  */
+  void addVector(const std::vector<ComplexType>& coefs, int jorb);
+  void addVector(const std::vector<RealType>& coefs, int jorb);
+
+  void setOrbitalSetSize(int norbs) override;
+
+  inline T evaluate(int ib, const PosType& pos)
+  {
+    myBasisSet->evaluate(pos);
+    return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
+  }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  /** boolean
+   *
+   * If true, this has to delete the BasisSet
+   */
+  bool OwnBasisSet;
+  ///TwistAngle of this PWOrbitalSetT
+  PosType TwistAngle;
+  ///My basis set
+  PWBasisPtr myBasisSet;
+  ///number of basis
+  IndexType BasisSetSize;
+  /** pointer to matrix containing the coefficients
+   *
+   * makeClone makes a shallow copy and flag IsCloned
+   */
+  ValueMatrix* C;
+  ///if true, do not clean up
+  bool IsCloned;
+
+  /** temporary array to perform gemm operation */
+  Matrix<T> Temp;
+};
+} // namespace qmcplusplus
+#endif

From 13a6e73bc8f3d6d67a9b679057f175c60fdacb3d Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 17 Aug 2023 14:27:26 -0400
Subject: [PATCH 09/17] Add templated class LCAOrbitalSetT

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 src/QMCWaveFunctions/BasisSetBase.h          |   5 +-
 src/QMCWaveFunctions/CMakeLists.txt          |   2 +-
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp | 966 +++++++++++++++++++
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h   | 336 +++++++
 4 files changed, 1305 insertions(+), 4 deletions(-)
 create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
 create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h

diff --git a/src/QMCWaveFunctions/BasisSetBase.h b/src/QMCWaveFunctions/BasisSetBase.h
index 7be77b13cb..8837e18832 100644
--- a/src/QMCWaveFunctions/BasisSetBase.h
+++ b/src/QMCWaveFunctions/BasisSetBase.h
@@ -134,9 +134,8 @@ struct SoaBasisSetBase
   using vgl_type          = VectorSoaContainer<T, OHMMS_DIM + 2>;
   using vgh_type          = VectorSoaContainer<T, 10>;
   using vghgh_type        = VectorSoaContainer<T, 20>;
-  using ValueType         = QMCTraits::ValueType;
-  using OffloadMWVGLArray = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>; // [VGL, walker, Orbs]
-  using OffloadMWVArray   = Array<ValueType, 2, OffloadPinnedAllocator<ValueType>>; // [walker, Orbs]
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
 
   ///size of the basis set
   int BasisSetSize;
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 1bbfc0520f..2db2ed4f13 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -71,7 +71,7 @@ if(OHMMS_DIM MATCHES 3)
 
   set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/eeI_JastrowBuilder.cpp Jastrow/CountingJastrowBuilder.cpp)
 
-  set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp
+  set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalSetT.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp
                    LCAO/AOBasisBuilder.cpp LCAO/SoaLocalizedBasisSet.cpp)
   if(QMC_COMPLEX)
     set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp)
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
new file mode 100644
index 0000000000..dba20478b7
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
@@ -0,0 +1,966 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "LCAOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+#include "CPU/BLAS.hpp"
+#include <ResourceCollection.h>
+
+namespace qmcplusplus
+{
+
+template<class T>
+struct LCAOrbitalSetT<T>::LCAOMultiWalkerMem : public Resource
+{
+  LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {}
+  LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {}
+
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<LCAOMultiWalkerMem>(*this); }
+
+  OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
+  OffloadMWVGLArray basis_mw;  // [5][NW][NumAO]
+  OffloadMWVArray phi_v;       // [NW][NumMO]
+  OffloadMWVArray basis_v_mw;  // [NW][NumMO]
+};
+
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs)
+    : SPOSetT<T>(my_name),
+      BasisSetSize(bs ? bs->getBasisSetSize() : 0),
+      Identity(true),
+      basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
+      mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
+{
+  if (!bs)
+    throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its  basis set!");
+  myBasisSet = std::move(bs);
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  this->OrbitalSetSize = BasisSetSize;
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in)
+    : SPOSetT<T>(in),
+      myBasisSet(in.myBasisSet->makeClone()),
+      C(in.C),
+      BasisSetSize(in.BasisSetSize),
+      C_copy(in.C_copy),
+      Identity(in.Identity),
+      basis_timer_(in.basis_timer_),
+      mo_timer_(in.mo_timer_)
+{
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  if (!in.Identity)
+  {
+    Tempv.resize(this->OrbitalSetSize);
+    Temphv.resize(this->OrbitalSetSize);
+    Tempghv.resize(this->OrbitalSetSize);
+  }
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{
+  if (C)
+    throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot reset existing MO coefficients");
+
+  Identity       = false;
+  this->OrbitalSetSize = norbs;
+  C              = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
+  Tempv.resize(this->OrbitalSetSize);
+  Temphv.resize(this->OrbitalSetSize);
+  Tempghv.resize(this->OrbitalSetSize);
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::checkObject() const
+{
+  if (Identity)
+  {
+    if (this->OrbitalSetSize != BasisSetSize)
+      throw std::runtime_error(
+          "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!");
+    if (C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should be nullptr if Identity = true!");
+  }
+  else
+  {
+    if (!C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should not be nullptr if Identity = false!");
+    if (this->OrbitalSetSize != C->rows())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C rows doesn't match OrbitalSetSize.");
+    if (BasisSetSize != C->cols())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C columns doesn't match BasisSetSize.");
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
+{
+  auto resource_index = collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader          = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  collection.takebackResource(spo_leader.mw_mem_handle_);
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetT<T>::makeClone() const { return std::make_unique<LCAOrbitalSetT<T>>(*this); }
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  if (Identity)
+  { //PAY ATTENTION TO COMPLEX
+    myBasisSet->evaluateV(P, iat, psi.data());
+  }
+  else
+  {
+    Vector<T> vTemp(Temp.data(0), BasisSetSize);
+    this->myBasisSet->evaluateV(P, iat, vTemp.data());
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    MatrixOperators::product(C_partial_view, vTemp, psi);
+  }
+}
+
+/** Find a better place for other user classes, Matrix should be padded as well */
+template<typename T, unsigned D>
+static void Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B, VectorSoaContainer<T, D>& C)
+{
+  constexpr char transa = 't';
+  constexpr char transb = 'n';
+  constexpr T zone(1);
+  constexpr T zero(0);
+  BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(),
+             C.capacity());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             ValueVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2psi.data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             HessVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                               int i,
+                                               ValueMatrix& psi,
+                                               GradMatrix& dpsi,
+                                               HessMatrix& d2psi,
+                                               GGGMatrix& dghpsi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+
+    dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx
+    dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy
+    dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz
+    dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
+    dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy
+    dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz
+    dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
+    dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
+    dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz
+
+    dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
+    dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
+    dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
+    dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy
+    dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz
+    dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
+    dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz
+    dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz
+
+    dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz
+    dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
+    dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
+    dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
+    dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
+    dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
+    dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
+    dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
+    dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                               ValueVector& psi,
+                                               GradVector& dpsi,
+                                               HessVector& d2psi,
+                                               GGGVector& dghpsi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+
+    dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx
+    dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy
+    dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz
+    dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
+    dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy
+    dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz
+    dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
+    dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
+    dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz
+
+    dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
+    dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
+    dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
+    dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy
+    dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz
+    dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
+    dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz
+
+    dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz
+    dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
+    dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
+    dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
+    dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
+    dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
+    dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
+    dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
+    dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const
+{
+  const size_t output_size     = dpsi.size();
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[j][0] = -gx[j];
+    dpsi[j][1] = -gy[j];
+    dpsi[j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  //TAKE CARE OF IDENTITY
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->evaluateVGL(P, iat, Temp);
+  }
+
+  if (Identity)
+    evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    {
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+      Product_ABt(Temp, C_partial_view, Tempv);
+    }
+    evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                   const RefVectorWithLeader<ParticleSet>& P_list,
+                                   int iat,
+                                   const RefVector<ValueVector>& psi_v_list,
+                                   const RefVector<GradVector>& dpsi_v_list,
+                                   const RefVector<ValueVector>& d2psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_vgl_v  = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
+
+  phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+
+  const size_t nw = phi_vgl_v.size(1);
+
+  //TODO: make this cleaner?
+  for (int iw = 0; iw < nw; iw++)
+  {
+    const size_t output_size = psi_v_list[iw].get().size();
+    std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
+    std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
+    // grads are [dim, walker, orb] in phi_vgl_v
+    //           [walker][orb, dim] in dpsi_v_list
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                           int iat,
+                                           OffloadMWVGLArray& phi_vgl_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& basis_mw   = spo_leader.mw_mem_handle_.getResource().basis_mw;
+  basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
+
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
+  }
+
+  if (Identity)
+  {
+    // output_size can be smaller than BasisSetSize
+    const size_t output_size = phi_vgl_v.size(2);
+    const size_t nw          = phi_vgl_v.size(1);
+
+    for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iw = 0; iw < nw; iw++)
+        std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_vgl_v.size(2);
+    assert(requested_orb_size <= this->OrbitalSetSize);
+    {
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+      // TODO: make class for general blas interface in Platforms
+      // have instance of that class as member of LCAOrbitalSetT, call gemm through that
+      BLAS::gemm('T', 'N',
+                 requested_orb_size,        // MOs
+                 spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
+                 BasisSetSize,              // AOs
+                 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
+                 requested_orb_size);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                     int iat,
+                                     const RefVector<ValueVector>& psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_v      = spo_leader.mw_mem_handle_.getResource().phi_v;
+  phi_v.resize(spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
+
+  const size_t output_size = phi_v.size(1);
+  const size_t nw          = phi_v.size(0);
+
+  for (int iw = 0; iw < nw; iw++)
+    std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                             int iat,
+                                             OffloadMWVArray& phi_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  const size_t nw  = spo_list.size();
+  auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
+  basis_v_mw.resize(nw, BasisSetSize);
+
+  myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
+
+  if (Identity)
+  {
+    std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_v.size(1);
+    assert(requested_orb_size <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+    BLAS::gemm('T', 'N',
+               requested_orb_size, // MOs
+               spo_list.size(),    // walkers
+               BasisSetSize,       // AOs
+               1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(),
+               requested_orb_size);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                         const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                         const RefVector<ValueVector>& psi_list,
+                                         const std::vector<const T*>& invRow_ptr_list,
+                                         std::vector<std::vector<T>>& ratios_list) const
+{
+  const size_t nw = spo_list.size();
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
+    {
+      spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
+      ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size());
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                      ValueVector& psi,
+                                      const ValueVector& psiinv,
+                                      std::vector<T>& ratios)
+{
+  Vector<T> vTemp(Temp.data(0), BasisSetSize);
+  Vector<T> invTemp(Temp.data(1), BasisSetSize);
+
+  {
+    ScopedTimer local(mo_timer_);
+    // when only a subset of orbitals is used, extract limited rows of C.
+    Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
+    MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
+  }
+
+  for (size_t j = 0; j < VP.getTotalNum(); j++)
+  {
+    {
+      ScopedTimer local(basis_timer_);
+      myBasisSet->evaluateV(VP, j, vTemp.data());
+    }
+    ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                   const RefVectorWithLeader<ParticleSet>& P_list,
+                                                   int iat,
+                                                   const std::vector<const T*>& invRow_ptr_list,
+                                                   OffloadMWVGLArray& phi_vgl_v,
+                                                   std::vector<T>& ratios,
+                                                   std::vector<GradType>& grads) const
+{
+  assert(this == &spo_list.getLeader());
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+  // Device data of phi_vgl_v must be up-to-date upon return
+  phi_vgl_v.updateTo();
+
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
+    GradType dphi;
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
+    grads[iw] = dphi;
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
+{
+  //TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGH(P, iat, Temph);
+  if (Identity)
+    evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Temph, C_partial_view, Temphv);
+    evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  HessVector& dhpsi,
+                                  GGGVector& dghpsi)
+{
+  // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n");
+
+  //TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+  if (Identity)
+    evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Tempgh, C_partial_view, Tempghv);
+    evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
+  }
+}
+
+/* implement using gemm algorithm */
+template<class T>
+inline void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                             int i,
+                                             ValueMatrix& logdet,
+                                             GradMatrix& dlogdet,
+                                             ValueMatrix& d2logdet) const
+{
+  const size_t output_size = logdet.cols();
+  std::copy_n(temp.data(0), output_size, logdet[i]);
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dlogdet[i][j][0] = gx[j];
+    dlogdet[i][j][1] = gy[j];
+    dlogdet[i][j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2logdet[i]);
+}
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                             int i,
+                                             ValueMatrix& psi,
+                                             GradMatrix& dpsi,
+                                             HessMatrix& d2psi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const
+{
+  const size_t output_size     = dpsi.cols();
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                                      int i,
+                                                      GradMatrix& dpsi,
+                                                      HessMatrix& dgpsi,
+                                                      GradMatrix& dlpsi) const
+{
+  const size_t output_size         = dpsi.cols();
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+
+    dgpsi[i][j](0, 0) = -hxx[j];
+    dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
+    dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
+    dgpsi[i][j](1, 1)                     = -hyy[j];
+    dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
+    dgpsi[i][j](2, 2)                     = -hzz[j];
+
+    //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector.
+    dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
+    dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
+    dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      Product_ABt(Temp, C_partial_view, Tempv);
+      evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         HessMatrix& grad_grad_logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      Product_ABt(Temph, C_partial_view, Temphv);
+      evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         HessMatrix& grad_grad_logdet,
+                                         GGGMatrix& grad_grad_grad_logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+      evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
+      Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
+      evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& gradphi)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      evaluate_ionderiv_v_impl(Temp, i, gradphi);
+    }
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      Product_ABt(this->Temp, *C, this->Tempv);
+      evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& grad_phi,
+                                       HessMatrix& grad_grad_phi,
+                                       GradMatrix& grad_lapl_phi)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    }
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      Product_ABt(this->Tempgh, *C, this->Tempghv);
+      evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
+                                          int iel,
+                                          const ParticleSet& source,
+                                          int iat_src,
+                                          GradVector& gradphi)
+{
+  if (Identity)
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
+  }
+  else
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    Product_ABt(Temp, *C, this->Tempv);
+    evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  if (!use_stored_copy)
+    *C_copy = *C;
+  //gemm is out-of-place
+  BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize,
+             rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize);
+
+  /* debugging code
+  app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl;
+  for (int j = 0; j < OrbitalSetSize; j++)
+    for (int i = 0; i < BasisSetSize; i++)
+    {
+      app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific
+                << *(C->data() + j * BasisSetSize + i);
+
+      if ((j * BasisSetSize + i + 1) % 4 == 0)
+        app_log() << std::endl;
+    }
+  */
+}
+
+// Class concrete types from ValueType
+template class LCAOrbitalSetT<double>;
+template class LCAOrbitalSetT<float>;
+template class LCAOrbitalSetT<std::complex<double>>;
+template class LCAOrbitalSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
new file mode 100644
index 0000000000..d8070b58e5
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
@@ -0,0 +1,336 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H
+
+#include <memory>
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/BasisSetBase.h"
+
+#include "Numerics/MatrixOperators.h"
+#include "Numerics/DeterminantOperators.h"
+
+namespace qmcplusplus
+{
+/** class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants.
+   *
+   * SoA verson of LCOrtbitalSet
+   * Localized basis set is always real 
+   */
+template<class T>  
+class LCAOrbitalSetT : public SPOSetT<T>
+{
+public:
+  using basis_type = SoaBasisSetBase<T>;
+  using vgl_type   = typename basis_type::vgl_type;
+  using vgh_type   = typename basis_type::vgh_type;
+  using vghgh_type = typename basis_type::vghgh_type;
+
+  using IndexType   = typename SPOSetT<T>::IndexType;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = typename SPOSetT<T>::ComplexType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using GGGMatrix  = typename SPOSetT<T>::GGGMatrix;
+  using GGGVector  = typename SPOSetT<T>::GGGVector;
+  using GradType = typename SPOSetT<T>::GradType;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+
+  ///pointer to the basis set
+  std::unique_ptr<basis_type> myBasisSet;
+  /// pointer to matrix containing the coefficients
+  std::shared_ptr<ValueMatrix> C;
+
+  /** constructor
+     * @param bs pointer to the BasisSet
+     */
+  LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs);
+
+  LCAOrbitalSetT(const LCAOrbitalSetT& in);
+
+  std::string getClassName() const final { return "LCAOrbitalSetT"; }
+
+  bool isRotationSupported() const final { return true; }
+
+  bool hasIonDerivs() const final { return true; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  void storeParamsBeforeRotation() final { C_copy = std::make_shared<ValueMatrix>(*C); }
+
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
+
+  /** set the OrbitalSetSize and Identity=false and initialize internal storages
+    */
+  void setOrbitalSetSize(int norbs) final;
+
+  /** return the size of the basis set
+    */
+  int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); }
+
+  bool isIdentity() const { return Identity; };
+
+  /** check consistency between Identity and C
+    *
+    */
+  void checkObject() const final;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const final;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSet>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const final;
+
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const T*>& invRow_ptr_list,
+                            std::vector<std::vector<T>>& ratios_list) const final;
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) final;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<T>& ratios,
+                                      std::vector<GradType>& grads) const final;
+
+  void evaluateVGH(const ParticleSet& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) final;
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) final;
+
+  //NOTE:  The data types get complicated here, so here's an overview of the
+  //       data types associated with ionic derivatives, and how to get their data.
+  //
+  //NOTE:  These data structures hold the data for one particular ion, and so the ID is implicit.
+  //       It's up to the user to keep track of which ion these derivatives refer to.
+  //
+  // 1.) GradMatrix grad_phi:  Holds the ionic derivatives of each SPO for each electron.
+  //            Example:  grad_phi[iel][iorb][idim].  iel  -- electron index.
+  //                                                iorb -- orbital index.
+  //                                                idim  -- cartesian index of ionic derivative.
+  //                                                        X=0, Y=1, Z=2.
+  //
+  // 2.) HessMatrix grad_grad_phi:  Holds the ionic derivatives of the electron gradient components
+  //                                   for each SPO and each electron.
+  //            Example:  grad_grad_phi[iel][iorb](idim,edim)  iel  -- electron index.
+  //                                                           iorb -- orbital index.
+  //                                                           idim -- ionic derivative's cartesian index.
+  //                                                              X=0, Y=1, Z=2
+  //                                                           edim -- electron derivative's cartesian index.
+  //                                                              x=0, y=1, z=2.
+  //
+  // 3.) GradMatrix grad_lapl_phi:  Holds the ionic derivatives of the electron laplacian for each SPO and each electron.
+  //            Example:  grad_lapl_phi[iel][iorb][idim].  iel  -- electron index.
+  //                                                       iorb -- orbital index.
+  //                                                       idim -- cartesian index of ionic derivative.
+  //                                                           X=0, Y=1, Z=2.
+
+  /**
+ * \brief Calculate ion derivatives of SPO's.
+ *  
+ *  @param P Electron particle set.
+ *  @param first index of first electron 
+ *  @@param last index of last electron
+ *  @param source Ion particle set.
+ *  @param iat_src  Index of ion.
+ *  @param gradphi Container storing ion gradients for all particles and all orbitals.
+ */
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) final;
+
+  /**
+ * \brief Calculate ion derivatives of SPO's, their gradients, and their laplacians.
+ *  
+ *  @param P Electron particle set.
+ *  @param first index of first electron.
+ *  @@param last index of last electron
+ *  @param source Ion particle set.
+ *  @param iat_src  Index of ion.
+ *  @param grad_phi Container storing ion gradients for all particles and all orbitals.
+ *  @param grad_grad_phi Container storing ion gradients of electron gradients for all particles and all orbitals.
+ *  @param grad_lapl_phi Container storing ion gradients of SPO laplacians for all particles and all orbitals.
+ */
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) final;
+
+  void evaluateGradSourceRow(const ParticleSet& P,
+                             int iel,
+                             const ParticleSet& source,
+                             int iat_src,
+                             GradVector& grad_phi) final;
+
+  void createResource(ResourceCollection& collection) const final;
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+
+protected:
+  ///number of Single-particle orbitals
+  const IndexType BasisSetSize;
+  /// a copy of the original C before orbital rotation is applied;
+  std::shared_ptr<ValueMatrix> C_copy;
+
+  ///true if C is an identity matrix
+  bool Identity;
+  ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L
+  vgl_type Temp;
+  ///Tempv(OrbitalSetSize) Tempv=C*Temp
+  vgl_type Tempv;
+
+  ///These are temporary VectorSoAContainers to hold value, gradient, and hessian for
+  ///all basis or SPO functions evaluated at a given point.
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temph;
+  ///Norbitals x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temphv;
+
+  ///These are temporary VectorSoAContainers to hold value, gradient, hessian, and
+  /// gradient hessian for all basis or SPO functions evaluated at a given point.
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempgh;
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempghv;
+
+private:
+  ///helper functions to handle Identity
+  void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const;
+
+  void evaluate_vgl_impl(const vgl_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         ValueMatrix& d2logdet) const;
+  ///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures.
+
+
+  ///This unpacks temp into vectors psi, dpsi, and d2psi.
+  void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const;
+
+  ///Unpacks temp into the ith row (or electron index) of logdet, dlogdet, dhlogdet.
+  void evaluate_vgh_impl(const vgh_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         HessMatrix& dhlogdet) const;
+  ///Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value, gradient, hessian
+  ///and gradient hessian.
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& d2psi,
+                           GGGVector& dghpsi) const;
+
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           int i,
+                           ValueMatrix& logdet,
+                           GradMatrix& dlogdet,
+                           HessMatrix& dhlogdet,
+                           GGGMatrix& dghlogdet) const;
+
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet.
+  void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const;
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient of value,
+  ///  electron gradient, and electron laplacian result into dlogdet, dglogdet, and dllogdet respectively.
+  void evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                  int i,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& dglogdet,
+                                  GradMatrix& dllogdet) const;
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet.
+  void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const;
+
+  void mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              OffloadMWVGLArray& phi_vgl_v) const;
+
+  /// packed walker GEMM implementation
+  void mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                OffloadMWVArray& phi_v) const;
+
+  struct LCAOMultiWalkerMem;
+  ResourceHandle<LCAOMultiWalkerMem> mw_mem_handle_;
+  /// timer for basis set
+  NewTimer& basis_timer_;
+  /// timer for MO
+  NewTimer& mo_timer_;
+};
+} // namespace qmcplusplus
+#endif

From fda3f2b5f7ce587faea41dc4e37bacb65f5033ec Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 24 Aug 2023 13:39:50 -0400
Subject: [PATCH 10/17] Add templated class LCAOrbitalSetWithCorrectionT

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 src/QMCWaveFunctions/CMakeLists.txt           |  2 +-
 .../LCAO/CuspCorrectionConstruction.h         |  2 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.cpp     | 70 +++++++++++++++++
 .../LCAO/LCAOrbitalSetWithCorrectionT.h       | 75 +++++++++++++++++++
 4 files changed, 147 insertions(+), 2 deletions(-)
 create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
 create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h

diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 2db2ed4f13..14032563d5 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -77,7 +77,7 @@ if(OHMMS_DIM MATCHES 3)
     set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp)
   else(QMC_COMPLEX)
     #LCAO cusp correction is not ready for complex
-    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp
+    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp LCAO/LCAOrbitalSetWithCorrectionT.cpp
                      LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp)
   endif(QMC_COMPLEX)
 
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
index d4f3208b61..3d1854cea4 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
@@ -15,7 +15,7 @@
 #define QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H
 
 #include "LCAOrbitalSet.h"
-#include "LCAOrbitalSetWithCorrection.h"
+#include "SoaCuspCorrection.h"
 #include "CuspCorrection.h"
 
 class Communicate;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
new file mode 100644
index 0000000000..e7ab5ed7ce
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
@@ -0,0 +1,70 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "LCAOrbitalSetWithCorrectionT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                                                              ParticleSet& ions,
+                                                              ParticleSet& els,
+                                                              std::unique_ptr<basis_type>&& bs)
+    : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
+{}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
+{
+  assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
+  this->OrbitalSetSize = norbs;
+  cusp.setOrbitalSetSize(norbs);
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetWithCorrectionT<T>::makeClone() const
+{
+  return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  lcao.evaluateValue(P, iat, psi);
+  cusp.addV(P, iat, psi);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSet& P,
+                                                  int iat,
+                                                  ValueVector& psi,
+                                                  GradVector& dpsi,
+                                                  ValueVector& d2psi)
+{
+  lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
+  cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSet& P,
+                                                           int first,
+                                                           int last,
+                                                           ValueMatrix& logdet,
+                                                           GradMatrix& dlogdet,
+                                                           ValueMatrix& d2logdet)
+{
+  lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
+}
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
new file mode 100644
index 0000000000..4346f578d2
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -0,0 +1,75 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "LCAOrbitalSetT.h"
+#include "SoaCuspCorrection.h"
+
+
+namespace qmcplusplus
+{
+/** class to add cusp correction to LCAOrbitalSet.
+   *
+   */
+
+template<typename T>
+class LCAOrbitalSetWithCorrectionT : public SPOSetT<T>
+{
+public:
+  using basis_type  = typename LCAOrbitalSetT<T>::basis_type;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  /** constructor
+     * @param ions
+     * @param els
+     * @param bs pointer to the BasisSet
+     * @param rl report level
+     */
+  LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                               ParticleSet& ions,
+                               ParticleSet& els,
+                               std::unique_ptr<basis_type>&& bs);
+
+  LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default;
+
+  std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  void setOrbitalSetSize(int norbs) final;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+  friend class LCAOrbitalBuilder;
+
+private:
+  LCAOrbitalSetT<T> lcao;
+
+  SoaCuspCorrection cusp;
+};
+} // namespace qmcplusplus
+#endif

From 40c8179b7f50c571719a8fc81fe461bb677958cc Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 24 Aug 2023 15:26:51 -0400
Subject: [PATCH 11/17] Fix LCAOrbitalSetWithCorrectionT. Add SPOSetBuilderT
 and SoaCuspCorrectionT.

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 src/QMCWaveFunctions/CMakeLists.txt           |   3 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.cpp     |   5 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.h       |   4 +-
 .../LCAO/SoaCuspCorrectionT.cpp               | 171 ++++++++++++++++
 .../LCAO/SoaCuspCorrectionT.h                 | 117 +++++++++++
 src/QMCWaveFunctions/SPOSetBuilderT.cpp       | 187 ++++++++++++++++++
 src/QMCWaveFunctions/SPOSetBuilderT.h         |  93 +++++++++
 src/QMCWaveFunctions/SPOSetInfo.h             |   2 +
 8 files changed, 578 insertions(+), 4 deletions(-)
 create mode 100644 src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
 create mode 100644 src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
 create mode 100644 src/QMCWaveFunctions/SPOSetBuilderT.cpp
 create mode 100644 src/QMCWaveFunctions/SPOSetBuilderT.h

diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 14032563d5..b21a1a3e9c 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -28,6 +28,7 @@ set(WFBASE_SRCS
     WaveFunctionComponent.cpp
     WaveFunctionComponentBuilder.cpp
     SPOSetBuilder.cpp
+    SPOSetBuilderT.cpp
     SPOInfo.cpp
     SPOSetInfo.cpp
     SPOSetInputInfo.cpp
@@ -78,7 +79,7 @@ if(OHMMS_DIM MATCHES 3)
   else(QMC_COMPLEX)
     #LCAO cusp correction is not ready for complex
     set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp LCAO/LCAOrbitalSetWithCorrectionT.cpp
-                     LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp)
+	    LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp LCAO/SoaCuspCorrectionT.cpp)
   endif(QMC_COMPLEX)
 
   if(HAVE_EINSPLINE)
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
index e7ab5ed7ce..f713646d82 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
@@ -19,7 +19,7 @@ LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(const std::string&
                                                               ParticleSet& ions,
                                                               ParticleSet& els,
                                                               std::unique_ptr<basis_type>&& bs)
-    : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
+    : SPOSetT<T>(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
 {}
 
 template<typename T>
@@ -67,4 +67,7 @@ void LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSet& P,
     cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
 }
 
+template class LCAOrbitalSetWithCorrectionT<double>;
+template class LCAOrbitalSetWithCorrectionT<float>;
+
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
index 4346f578d2..67ea645416 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -16,7 +16,7 @@
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "QMCWaveFunctions/BasisSetBase.h"
 #include "LCAOrbitalSetT.h"
-#include "SoaCuspCorrection.h"
+#include "SoaCuspCorrectionT.h"
 
 
 namespace qmcplusplus
@@ -69,7 +69,7 @@ class LCAOrbitalSetWithCorrectionT : public SPOSetT<T>
 private:
   LCAOrbitalSetT<T> lcao;
 
-  SoaCuspCorrection cusp;
+  SoaCuspCorrectionT<T> cusp;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
new file mode 100644
index 0000000000..57a1312447
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
@@ -0,0 +1,171 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaCuspCorrectionT.cpp
+ */
+#include "SoaCuspCorrectionT.h"
+#include "SoaCuspCorrectionBasisSet.h"
+
+namespace qmcplusplus
+{
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions))
+{
+  NumCenters = ions.getTotalNum();
+  NumTargets = els.getTotalNum();
+  LOBasisSet.resize(NumCenters);
+}
+
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(const SoaCuspCorrectionT<T>& a) = default;
+
+template<class T>
+void SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
+{
+  MaxOrbSize = norbs;
+  myVGL.resize(5, MaxOrbSize);
+}
+
+template<class T>
+inline void SoaCuspCorrectionT<T>::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl)
+{
+  assert(MaxOrbSize >= vgl.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  {
+    const auto v_in  = myVGL[0];
+    const auto gx_in = myVGL[1];
+    const auto gy_in = myVGL[2];
+    const auto gz_in = myVGL[3];
+    const auto l_in  = myVGL[4];
+    auto v_out       = vgl.data(0);
+    auto gx_out      = vgl.data(1);
+    auto gy_out      = vgl.data(2);
+    auto gz_out      = vgl.data(3);
+    auto l_out       = vgl.data(4);
+    for (size_t i = 0; i < vgl.size(); ++i)
+    {
+      v_out[i] += v_in[i];
+      gx_out[i] += gx_in[i];
+      gy_out[i] += gy_in[i];
+      gz_out[i] += gz_in[i];
+      l_out[i] += l_in[i];
+    }
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
+                                         int iat,
+                                         ValueVector& psi,
+                                         GradVector& dpsi,
+                                         ValueVector& d2psi)
+{
+  assert(MaxOrbSize >= psi.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.size(); ++i)
+  {
+    psi[i] += v_in[i];
+    dpsi[i][0] += gx_in[i];
+    dpsi[i][1] += gy_in[i];
+    dpsi[i][2] += gz_in[i];
+    d2psi[i] += l_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
+                                         int iat,
+                                         int idx,
+                                         ValueMatrix& psi,
+                                         GradMatrix& dpsi,
+                                         ValueMatrix& d2psi)
+{
+  assert(MaxOrbSize >= psi.cols());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.cols(); ++i)
+  {
+    psi[idx][i] += v_in[i];
+    dpsi[idx][i][0] += gx_in[i];
+    dpsi[idx][i][1] += gy_in[i];
+    dpsi[idx][i][2] += gz_in[i];
+    d2psi[idx][i] += l_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluateV(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  assert(MaxOrbSize >= psi.size());
+  T* tmp_vals = myVGL[0];
+
+  std::fill_n(tmp_vals, myVGL.size(), 0.0);
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+
+  //THIS IS SERIAL, only way to avoid this is to use myVGL
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate(dist[c], tmp_vals);
+
+  { //collect
+    const auto v_in = myVGL[0];
+    for (size_t i = 0; i < psi.size(); ++i)
+      psi[i] += v_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
+{
+  assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!");
+  LOBasisSet[icenter].reset(aos.release());
+}
+
+template class SoaCuspCorrectionT<double>;
+template class SoaCuspCorrectionT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
new file mode 100644
index 0000000000..f20bfa5730
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
@@ -0,0 +1,117 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaCuspCorrectionT.h
+ */
+#ifndef QMCPLUSPLUS_SOA_CUSPCORRECTION_H
+#define QMCPLUSPLUS_SOA_CUSPCORRECTION_H
+
+#include "Configuration.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+class CuspCorrectionAtomicBasis;
+
+/** A localized basis set derived from BasisSetBase<typename COT::ValueType>
+ *
+ * This class performs the evaluation of the basis functions and their
+ * derivatives for each of the N-particles in a configuration.
+ * The template parameter COT denotes Centered-Orbital-Type which provides
+ * a set of localized orbitals associated with a center.
+ */
+template<class T>
+class SoaCuspCorrectionT
+{
+  using RealType    = typename SPOSetT<T>::RealType;
+  using VGLVector   = VectorSoaContainer<T, 5>;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using PosType     = typename SPOSetT<T>::PosType;
+
+  ///number of centers, e.g., ions
+  size_t NumCenters;
+  ///number of quantum particles
+  size_t NumTargets;
+  ///number of quantum particles
+  const int myTableIndex;
+  /** Maximal number of supported MOs
+   * this is not the AO basis because cusp correction is applied on the MO directly.
+   */
+  int MaxOrbSize = 0;
+
+  ///COMPLEX WON'T WORK
+  using COT = CuspCorrectionAtomicBasis<RealType>;
+
+  /** container of the unique pointers to the Atomic Orbitals
+   *
+   * size of LOBasisSet = number of centers (atoms)
+   * should use unique_ptr once COT is fixed for better performance
+   */
+  std::vector<std::shared_ptr<const COT>> LOBasisSet;
+
+  Matrix<RealType> myVGL;
+
+public:
+  /** constructor
+   * @param ions ionic system
+   * @param els electronic system
+   */
+  SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els);
+
+  /** copy constructor */
+  SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
+
+  /** set the number of orbitals this cusp correction may serve. call this before adding any correction centers.
+   */
+  void setOrbitalSetSize(int norbs);
+
+  /** compute VGL
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(5,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl);
+
+  void evaluate_vgl(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluate_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi);
+
+  /** compute values for the iat-paricle move
+   *
+   * Always uses getTempDists() and getTempDispls()
+   */
+  void evaluateV(const ParticleSet& P, int iat, ValueVector& psi);
+
+  /** add a new set of Centered Atomic Orbitals
+   * @param icenter the index of the center
+   * @param aos a set of Centered Atomic Orbitals
+   */
+  void add(int icenter, std::unique_ptr<COT> aos);
+
+  void addVGL(const ParticleSet& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); }
+  void addV(const ParticleSet& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); }
+  void add_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi)
+  {
+    evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
+  }
+  void add_vector_vgl(const ParticleSet& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi)
+  {
+    evaluate_vgl(P, iat, vals, dpsi, d2psi);
+  }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.cpp b/src/QMCWaveFunctions/SPOSetBuilderT.cpp
new file mode 100644
index 0000000000..c682d6a77a
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetBuilderT.cpp
@@ -0,0 +1,187 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SPOSetBuilderT.h"
+#include "OhmmsData/AttributeSet.h"
+#include <Message/UniformCommunicateError.h>
+
+#ifndef QMC_COMPLEX
+#include "QMCWaveFunctions/RotatedSPOsT.h"
+#endif
+
+namespace qmcplusplus
+{
+template<typename T>
+SPOSetBuilderT<T>::SPOSetBuilderT(const std::string& type_name, Communicate* comm)
+    : MPIObjectBase(comm), legacy(true), type_name_(type_name)
+{
+  reserve_states();
+}
+
+template<typename T>
+void SPOSetBuilderT<T>::reserve_states(int nsets)
+{
+  int sets_needed = nsets - states.size();
+  if (sets_needed > 0)
+    for (int s = 0; s < sets_needed; ++s)
+      states.push_back(std::make_unique<SPOSetInfo>());
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info)
+{
+  myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented");
+  return 0;
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur)
+{
+  std::string spo_object_name;
+  std::string optimize("no");
+
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_object_name, "id");
+  attrib.add(spo_object_name, "name");
+  attrib.add(optimize, "optimize");
+  attrib.put(cur);
+
+  app_summary() << std::endl;
+  app_summary() << "     Single particle orbitals (SPO)" << std::endl;
+  app_summary() << "     ------------------------------" << std::endl;
+  app_summary() << "      Name: " << spo_object_name << "   Type: " << type_name_
+                << "   Builder class name: " << ClassName << std::endl;
+  app_summary() << std::endl;
+
+  if (spo_object_name.empty())
+    myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!");
+
+  // read specialized sposet construction requests
+  //   and translate them into a set of orbital indices
+  SPOSetInputInfo input_info(cur);
+
+  // process general sposet construction requests
+  //   and preserve legacy interface
+  std::unique_ptr<SPOSetT<T>> sposet;
+
+  try
+  {
+    if (legacy && input_info.legacy_request)
+      sposet = createSPOSetFromXML(cur);
+    else
+      sposet = createSPOSet(cur, input_info);
+  }
+  catch (const UniformCommunicateError& ue)
+  {
+    myComm->barrier_and_abort(ue.what());
+  }
+
+  if (!sposet)
+    myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed");
+
+  if (optimize == "rotation" || optimize == "yes")
+  {
+#ifdef QMC_COMPLEX
+    app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n";
+    abort();
+#else
+    app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead"
+                  << std::endl;
+
+    sposet->storeParamsBeforeRotation();
+    // create sposet with rotation
+    auto& sposet_ref = *sposet;
+    app_log() << "  SPOSet " << sposet_ref.getName() << " is optimizable\n";
+    if (!sposet_ref.isRotationSupported())
+      myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" +
+                                sposet_ref.getClassName() + "'.");
+    auto rot_spo    = std::make_unique<RotatedSPOsT<T>>(sposet_ref.getName(), std::move(sposet));
+    xmlNodePtr tcur = cur->xmlChildrenNode;
+    while (tcur != NULL)
+    {
+      std::string cname((const char*)(tcur->name));
+      if (cname == "opt_vars")
+      {
+        std::vector<RealType> params;
+        putContent(params, tcur);
+        rot_spo->setRotationParameters(params);
+      }
+      tcur = tcur->next;
+    }
+    sposet = std::move(rot_spo);
+#endif
+  }
+
+  if (sposet->getName().empty())
+    app_warning() << "SPOSet object doesn't have a name." << std::endl;
+  if (!spo_object_name.empty() && sposet->getName() != spo_object_name)
+    app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name
+                  << "   object name: " << sposet->getName() << std::endl;
+
+  sposet->checkObject();
+  return sposet;
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createRotatedSPOSet(xmlNodePtr cur)
+{
+  std::string spo_object_name;
+  std::string method;
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_object_name, "name");
+  attrib.add(method, "method", {"global", "history"});
+  attrib.put(cur);
+
+
+#ifdef QMC_COMPLEX
+  myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet.");
+  return nullptr;
+#else
+  std::unique_ptr<SPOSetT<T>> sposet;
+  processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) {
+    if (cname == "sposet")
+    {
+      sposet = createSPOSet(element);
+    }
+  });
+
+  if (!sposet)
+    myComm->barrier_and_abort("Rotated SPO needs an SPOset");
+
+  if (!sposet->isRotationSupported())
+    myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" +
+                              sposet->getClassName() + "'.");
+
+  sposet->storeParamsBeforeRotation();
+  auto rot_spo = std::make_unique<RotatedSPOsT<T>>(spo_object_name, std::move(sposet));
+
+  if (method == "history")
+    rot_spo->set_use_global_rotation(false);
+
+  processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) {
+    if (cname == "opt_vars")
+    {
+      std::vector<RealType> params;
+      putContent(params, element);
+      rot_spo->setRotationParameters(params);
+    }
+  });
+  return rot_spo;
+#endif
+}
+template class SPOSetBuilderT<double>;
+template class SPOSetBuilderT<float>;
+template class SPOSetBuilderT<std::complex<double>>;
+template class SPOSetBuilderT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.h b/src/QMCWaveFunctions/SPOSetBuilderT.h
new file mode 100644
index 0000000000..fecae4abd7
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetBuilderT.h
@@ -0,0 +1,93 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SPOSetBuilderT.h
+ * @brief Declaration of a base class of SPOSet Builders
+ */
+#ifndef QMCPLUSPLUS_SPOSET_BUILDER_H
+#define QMCPLUSPLUS_SPOSET_BUILDER_H
+
+#include <memory>
+#include <vector>
+#include <string>
+#include "Message/MPIObjectBase.h"
+#include "QMCWaveFunctions/SPOSetInfo.h"
+#include "QMCWaveFunctions/SPOSetInputInfo.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "hdf/hdf_archive.h"
+
+namespace qmcplusplus
+{
+/** base class for the real SPOSet builder
+ *
+ * \warning {
+ * We have not quite figured out how to use real/complex efficiently.
+ * There are three cases we have to deal with
+ * - real basis functions and real coefficients
+ * - real basis functions and complex coefficients
+ * - complex basis functions and complex coefficients
+ * For now, we decide to keep both real and complex basis sets and expect
+ * the user classes {\bf KNOW} what they need to use.
+ * }
+ */
+template<typename T>
+class SPOSetBuilderT : public QMCTraits, public MPIObjectBase
+{
+public:
+  using RealType   = typename SPOSetT<T>::RealType;
+  using indices_t  = std::vector<int>;
+  using energies_t = std::vector<RealType>;
+
+  /// whether implementation conforms only to legacy standard
+  bool legacy;
+
+  /// state info of all possible states available in the basis
+  std::vector<std::unique_ptr<SPOSetInfo>> states;
+
+  SPOSetBuilderT(const std::string& type_name, Communicate* comm);
+  virtual ~SPOSetBuilderT() {}
+
+  /// reserve space for states (usually only one set, multiple for e.g. spin dependent einspline)
+  void reserve_states(int nsets = 1);
+
+  /// allow modification of state information
+  inline void modify_states(int index = 0) { states[index]->modify(); }
+
+  /// clear state information
+  inline void clear_states(int index = 0) { states[index]->clear(); }
+
+  /// create an sposet from xml and save the resulting SPOSet
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur);
+
+  /// create orbital rotation transformation from xml and save the resulting SPOSet
+  std::unique_ptr<SPOSetT<T>> createRotatedSPOSet(xmlNodePtr cur);
+
+  const std::string& getTypeName() const { return type_name_; }
+
+protected:
+  /// create an sposet from xml (legacy)
+  virtual std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) = 0;
+
+  /// create an sposet from a general xml request
+  virtual std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info);
+
+  /// type name of the SPO objects built by this builder.
+  const std::string type_name_;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetInfo.h b/src/QMCWaveFunctions/SPOSetInfo.h
index 9653dc73f8..8ee31c909d 100644
--- a/src/QMCWaveFunctions/SPOSetInfo.h
+++ b/src/QMCWaveFunctions/SPOSetInfo.h
@@ -130,6 +130,8 @@ class SPOSetInfo
   void clear();
 
   friend class SPOSetBuilder;
+  template<typename T>
+  friend class SPOSetBuilderT;
 };
 
 

From 4527547975460b58ee092c777100aeb2e8f60663 Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 24 Aug 2023 16:55:38 -0400
Subject: [PATCH 12/17] Add SHOSetBuilderT

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 .../BsplineFactory/BsplineSetT.h              |   4 +-
 .../BsplineFactory/SplineC2CT.h               |   4 +-
 src/QMCWaveFunctions/CMakeLists.txt           |   1 +
 .../ElectronGas/FreeOrbitalT.h                |   4 +-
 .../HarmonicOscillator/SHOSetBuilderT.cpp     | 209 ++++++++++++++++++
 .../HarmonicOscillator/SHOSetBuilderT.h       |  63 ++++++
 .../HarmonicOscillator/SHOSetT.h              |   4 +-
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h    |   4 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.h       |   4 +-
 src/QMCWaveFunctions/PlaneWave/PWBasisT.h     |   4 +-
 src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h |   4 +-
 .../PlaneWave/PWRealOrbitalSetT.h             |   4 +-
 src/QMCWaveFunctions/SPOSetBuilderT.h         |   4 +-
 src/QMCWaveFunctions/SpinorSetT.h             |   4 +-
 src/QMCWaveFunctions/tests/ConstantSPOSetT.h  |   4 +-
 src/QMCWaveFunctions/tests/FakeSPOT.h         |   4 +-
 16 files changed, 299 insertions(+), 26 deletions(-)
 create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
 create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h

diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
index 8ef12b8524..ba90502537 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
@@ -18,8 +18,8 @@
  *
  * BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implementation
  */
-#ifndef QMCPLUSPLUS_BSPLINESET_H
-#define QMCPLUSPLUS_BSPLINESET_H
+#ifndef QMCPLUSPLUS_BSPLINESETT_H
+#define QMCPLUSPLUS_BSPLINESETT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "spline/einspline_engine.hpp"
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
index db93e72a43..fd55fcd9f2 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
@@ -15,8 +15,8 @@
  *
  * class to handle complex splines to complex orbitals with splines of arbitrary precision
  */
-#ifndef QMCPLUSPLUS_SPLINE_C2C_H
-#define QMCPLUSPLUS_SPLINE_C2C_H
+#ifndef QMCPLUSPLUS_SPLINE_C2CT_H
+#define QMCPLUSPLUS_SPLINE_C2CT_H
 
 #include <memory>
 #include "BsplineSetT.h"
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index b21a1a3e9c..70db8c580a 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -38,6 +38,7 @@ set(WFBASE_SRCS
     HarmonicOscillator/SHOSet.cpp
     HarmonicOscillator/SHOSetT.cpp
     HarmonicOscillator/SHOSetBuilder.cpp
+    HarmonicOscillator/SHOSetBuilderT.cpp
     ExampleHeBuilder.cpp
     ExampleHeComponent.cpp)
 
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
index c73ab26a2a..d2f2f450b8 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
@@ -15,8 +15,8 @@
 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-#ifndef QMCPLUSPLUS_FREE_ORBITAL
-#define QMCPLUSPLUS_FREE_ORBITAL
+#ifndef QMCPLUSPLUS_FREE_ORBITALT_H
+#define QMCPLUSPLUS_FREE_ORBITALT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
new file mode 100644
index 0000000000..0e1638f765
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
@@ -0,0 +1,209 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SHOSetBuilderT.h"
+#include "QMCWaveFunctions/SPOSetInputInfo.h"
+#include "OhmmsData/AttributeSet.h"
+#include "Utilities/IteratorUtility.h"
+#include "Utilities/string_utils.h"
+
+
+namespace qmcplusplus
+{
+template<class T>
+SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSet& P, Communicate* comm) : SPOSetBuilderT<T>("SHO", comm), Ps(P)
+{
+  this->ClassName = "SHOSetBuilderT";
+  this->legacy    = false;
+  app_log() << "Constructing SHOSetBuilderT" << std::endl;
+  reset();
+}
+
+template<class T>
+SHOSetBuilderT<T>::~SHOSetBuilderT() = default;
+
+template<class T>
+void SHOSetBuilderT<T>::reset()
+{
+  nstates = 0;
+  mass    = -1.0;
+  energy  = -1.0;
+  length  = -1.0;
+  center  = 0.0;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+{
+  APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not use legacy interface");
+
+  app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
+
+  SPOSetInputInfo input(cur);
+
+  return createSPOSet(cur, input);
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
+{
+  app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
+  reset();
+
+  // read parameters
+  std::string spo_name = "sho";
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_name, "name");
+  attrib.add(spo_name, "id");
+  attrib.add(mass, "mass");
+  attrib.add(energy, "energy");
+  attrib.add(energy, "frequency");
+  attrib.add(length, "length");
+  attrib.add(center, "center");
+  attrib.add(nstates, "size");
+  attrib.put(cur);
+
+  if (energy < 0.0)
+    energy = 1.0;
+  if (mass < 0.0 && length < 0.0)
+    length = 1.0;
+  if (mass < 0.0)
+    mass = 1.0 / (energy * length * length);
+  else if (length < 0.0)
+    length = 1.0 / std::sqrt(mass * energy);
+
+  // initialize states and/or adjust basis
+  int smax = -1;
+  if (input.has_index_info)
+    smax = std::max(smax, input.max_index());
+  if (input.has_energy_info)
+  {
+    smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
+  }
+  if (smax < 0)
+    APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
+  update_basis_states(smax);
+
+  // create sho state request
+  indices_t& indices = input.get_indices(this->states);
+  std::vector<SHOState*> sho_states;
+  for (int i = 0; i < indices.size(); ++i)
+    sho_states.push_back(basis_states[indices[i]]);
+
+  // make the sposet
+  auto sho = std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
+
+  sho->report("  ");
+  return sho;
+}
+
+template<class T>
+void SHOSetBuilderT<T>::update_basis_states(int smax)
+{
+  int states_required = smax - basis_states.size() + 1;
+  if (states_required > 0)
+  {
+    RealType N = smax + 1;
+    if (QMCTraits::DIM == 1)
+      nmax = smax;
+    else if (QMCTraits::DIM == 2)
+      nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
+    else if (QMCTraits::DIM == 3)
+    {
+      RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
+      nmax       = std::ceil(f / 3. + 1. / f - 2.);
+    }
+    else
+      APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other than 1, 2, or 3 are not supported");
+    int ndim                     = nmax + 1;
+    ind_dims[QMCTraits::DIM - 1] = 1;
+    for (int d = QMCTraits::DIM - 2; d > -1; --d)
+      ind_dims[d] = ind_dims[d + 1] * ndim;
+    int s    = 0;
+    int ntot = pow(ndim, QMCTraits::DIM);
+    TinyVector<int, QMCTraits::DIM> qnumber;
+    for (int m = 0; m < ntot; ++m)
+    {
+      int n    = 0; // principal quantum number
+      int nrem = m;
+      for (int d = 0; d < QMCTraits::DIM; ++d)
+      {
+        int i = nrem / ind_dims[d];
+        nrem -= i * ind_dims[d];
+        qnumber[d] = i;
+        n += i;
+      }
+      if (n <= nmax)
+      {
+        SHOState* st;
+        if (s < basis_states.size())
+          st = basis_states[s];
+        else
+        {
+          st = new SHOState();
+          basis_states.add(st);
+        }
+        RealType e = energy * (n + .5 * QMCTraits::DIM);
+        st->set(qnumber, e);
+        s++;
+      }
+    }
+    basis_states.energy_sort(1e-6, true);
+  }
+
+  // reset energy scale even if no states need to be added
+  for (int i = 0; i < basis_states.size(); ++i)
+  {
+    SHOState& state                                = *basis_states[i];
+    const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
+    int n                                          = 0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      n += qnumber[d];
+    state.energy = energy * (n + .5 * QMCTraits::DIM);
+  }
+
+  //somewhat redundant, but necessary
+  this->clear_states(0);
+  this->states[0]->finish(basis_states.states);
+
+  if (basis_states.size() <= smax)
+    APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough states");
+}
+
+template<class T>
+void SHOSetBuilderT<T>::report(const std::string& pad)
+{
+  app_log() << pad << "SHOSetBuilderT report" << std::endl;
+  app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
+  app_log() << pad << "  mass      = " << mass << std::endl;
+  app_log() << pad << "  frequency = " << energy << std::endl;
+  app_log() << pad << "  energy    = " << energy << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nstates   = " << nstates << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
+  app_log() << pad << "  # basis states = " << basis_states.size() << std::endl;
+  app_log() << pad << "  basis_states" << std::endl;
+  for (int s = 0; s < basis_states.size(); ++s)
+    basis_states[s]->report(pad + "  " + int2string(s) + " ");
+  app_log() << pad << "end SHOSetBuilderT report" << std::endl;
+  app_log().flush();
+}
+
+template class SHOSetBuilderT<double>;
+template class SHOSetBuilderT<float>;
+template class SHOSetBuilderT<std::complex<double>>;
+template class SHOSetBuilderT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
new file mode 100644
index 0000000000..7b3e9430d8
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
@@ -0,0 +1,63 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
+#define QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
+
+#include "QMCWaveFunctions/HarmonicOscillator/SHOSetT.h"
+#include "QMCWaveFunctions/SPOSetBuilderT.h"
+#include "QMCWaveFunctions/SPOSetInfo.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class SHOSetBuilderT : public SPOSetBuilderT<T>
+{
+public:
+  using RealType  = typename SPOSetT<T>::RealType;
+  using PosType   = typename SPOSetT<T>::PosType;
+  using indices_t = typename SPOSetBuilderT<T>::indices_t;
+
+  ParticleSet& Ps;
+
+  RealType length;
+  RealType mass;
+  RealType energy;
+  PosType center;
+
+  int nstates;
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> ind_dims;
+
+  SPOSetInfoSimple<SHOState> basis_states;
+
+  //construction/destruction
+  SHOSetBuilderT(ParticleSet& P, Communicate* comm);
+
+  ~SHOSetBuilderT() override;
+
+  //reset parameters
+  void reset();
+
+  //SPOSetBuilder interface
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
+
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
+
+  //local functions
+  void update_basis_states(int smax);
+  void report(const std::string& pad = "");
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
index bd4870a63c..6ef256df92 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
@@ -11,8 +11,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_SHOSET_H
-#define QMCPLUSPLUS_SHOSET_H
+#ifndef QMCPLUSPLUS_SHOSETT_H
+#define QMCPLUSPLUS_SHOSETT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "QMCWaveFunctions/SPOInfo.h"
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
index d8070b58e5..6df0013bd5 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
@@ -10,8 +10,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H
-#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H
 
 #include <memory>
 #include "QMCWaveFunctions/SPOSetT.h"
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
index 67ea645416..b1fc69cf6e 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -10,8 +10,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H
-#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "QMCWaveFunctions/BasisSetBase.h"
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
index 54592b9ba7..a3acaf7aad 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
@@ -15,8 +15,8 @@
 /** @file PWBasis.h
  * @brief Declaration of Plane-wave basis set
  */
-#ifndef QMCPLUSPLUS_PLANEWAVEBASIS_BLAS_H
-#define QMCPLUSPLUS_PLANEWAVEBASIS_BLAS_H
+#ifndef QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
 
 #include "Configuration.h"
 #include "Particle/ParticleSet.h"
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
index 5add827a86..225033214b 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
@@ -15,8 +15,8 @@
 /** @file PWOrbitalSet.h
  * @brief Definition of member functions of Plane-wave basis set
  */
-#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
-#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H
 
 #include "QMCWaveFunctions/PlaneWave/PWBasis.h"
 #include "QMCWaveFunctions/SPOSet.h"
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
index 29e484f3ff..8455b1e561 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
@@ -19,8 +19,8 @@
  * This is a specialized single-particle orbital set for real trial
  * wavefunctions and enabled with QMC_COMPLEX=0
  */
-#ifndef QMCPLUSPLUS_PLANEWAVE_REALORBITALSET_BLAS_H
-#define QMCPLUSPLUS_PLANEWAVE_REALORBITALSET_BLAS_H
+#ifndef QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H
 
 #include "QMCWaveFunctions/PlaneWave/PWBasis.h"
 #include "QMCWaveFunctions/SPOSetT.h"
diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.h b/src/QMCWaveFunctions/SPOSetBuilderT.h
index fecae4abd7..060451a94d 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderT.h
+++ b/src/QMCWaveFunctions/SPOSetBuilderT.h
@@ -18,8 +18,8 @@
 /** @file SPOSetBuilderT.h
  * @brief Declaration of a base class of SPOSet Builders
  */
-#ifndef QMCPLUSPLUS_SPOSET_BUILDER_H
-#define QMCPLUSPLUS_SPOSET_BUILDER_H
+#ifndef QMCPLUSPLUS_SPOSET_BUILDERT_H
+#define QMCPLUSPLUS_SPOSET_BUILDERT_H
 
 #include <memory>
 #include <vector>
diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h
index bc59e610aa..fe50a256fe 100644
--- a/src/QMCWaveFunctions/SpinorSetT.h
+++ b/src/QMCWaveFunctions/SpinorSetT.h
@@ -10,8 +10,8 @@
 // File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-#ifndef QMCPLUSPLUS_SPINORSET_H
-#define QMCPLUSPLUS_SPINORSET_H
+#ifndef QMCPLUSPLUS_SPINORSETT_H
+#define QMCPLUSPLUS_SPINORSETT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 #include "ResourceHandle.h"
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
index a6e16f8e3d..483136360a 100644
--- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
@@ -10,8 +10,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_CONSTANTSPOSET_H
-#define QMCPLUSPLUS_CONSTANTSPOSET_H
+#ifndef QMCPLUSPLUS_CONSTANTSPOSETT_H
+#define QMCPLUSPLUS_CONSTANTSPOSETT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h
index ee452842f5..dfa6689bd6 100644
--- a/src/QMCWaveFunctions/tests/FakeSPOT.h
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.h
@@ -10,8 +10,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_FAKESPOT_H
-#define QMCPLUSPLUS_FAKESPOT_H
+#ifndef QMCPLUSPLUS_FAKESPOTT_H
+#define QMCPLUSPLUS_FAKESPOTT_H
 
 #include "QMCWaveFunctions/SPOSetT.h"
 

From 5e16c5ae85a78988dc5923fa2755e9de039e8167 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Fri, 25 Aug 2023 10:03:10 -0400
Subject: [PATCH 13/17] Fix PWOrbitalSet alias types

Reuse SPOSet types
---
 src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
index 39d67f70b1..25c3e0d5c1 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
@@ -29,17 +29,16 @@ namespace qmcplusplus
 template<class T>
 class PWOrbitalSetT : public SPOSetT<T>
 {
-  
 public:
-  using RealType = typename RealAlias_impl<T>::value_type;
+  using RealType    = typename SPOSetT<T>::RealType;
   using ComplexType = T;
-  using PosType = QMCTraits::PosType;
+  using PosType     = typename SPOSetT<T>::PosType;
   using ValueVector = typename SPOSetT<T>::ValueVector;
   using GradVector  = typename SPOSetT<T>::GradVector;
   using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
   using GradMatrix  = typename SPOSetT<T>::GradMatrix;
-  using GradType = QMCTraits::GradType;
-  using IndexType = QMCTraits::IndexType;
+  using GradType    = typename SPOSetT<T>::GradType;
+  using IndexType   = typename SPOSetT<T>::IndexType;
 
   using BasisSet_t = PWBasisT<T>;
   using PWBasisPtr = PWBasisT<T>*;
@@ -55,7 +54,6 @@ class PWOrbitalSetT : public SPOSetT<T>
     PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
   };
 
- 
 
   /** default constructor
   */

From 97cbde57e2e909fa87b35fcbe27183cd01d37567 Mon Sep 17 00:00:00 2001
From: Philip Fackler <facklerpw@ornl.gov>
Date: Fri, 4 Aug 2023 12:38:31 -0400
Subject: [PATCH 14/17] Implement CompositeSPOSetT class

---
 src/QMCWaveFunctions/CMakeLists.txt       |   1 +
 src/QMCWaveFunctions/CompositeSPOSetT.cpp | 193 ++++++++++++++++++++++
 src/QMCWaveFunctions/CompositeSPOSetT.h   | 112 +++++++++++++
 3 files changed, 306 insertions(+)
 create mode 100644 src/QMCWaveFunctions/CompositeSPOSetT.cpp
 create mode 100644 src/QMCWaveFunctions/CompositeSPOSetT.h

diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 70db8c580a..72f20ee447 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -35,6 +35,7 @@ set(WFBASE_SRCS
     SPOSet.cpp
     SPOSetT.cpp
     CompositeSPOSet.cpp
+    CompositeSPOSetT.cpp
     HarmonicOscillator/SHOSet.cpp
     HarmonicOscillator/SHOSetT.cpp
     HarmonicOscillator/SHOSetBuilder.cpp
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
new file mode 100644
index 0000000000..1d635e8a41
--- /dev/null
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
@@ -0,0 +1,193 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "CompositeSPOSetT.h"
+
+#include "OhmmsData/AttributeSet.h"
+#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
+#include "Utilities/IteratorUtility.h"
+
+#include <algorithm>
+
+namespace qmcplusplus
+{
+namespace MatrixOperators
+{
+/** copy a small matrix (N, M1) to a big matrix (N, M2), M2>M1
+ * @param small input matrix
+ * @param big outout matrix
+ * @param offset_c column offset
+ *
+ * @todo smater and more efficient matrix, move up for others
+ * The columns [0,M1) are inserted into [offset_c,offset_c+M1).
+ */
+template <typename MAT1, typename MAT2>
+inline void
+insert_columns(const MAT1& small, MAT2& big, int offset_c)
+{
+	const int c = small.cols();
+	for (int i = 0; i < small.rows(); ++i)
+		std::copy(small[i], small[i] + c, big[i] + offset_c);
+}
+} // namespace MatrixOperators
+
+template <typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const std::string& my_name) :
+	SPOSetT<T>(my_name)
+{
+	this->OrbitalSetSize = 0;
+	component_offsets.reserve(4);
+}
+
+template <typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const CompositeSPOSetT<T>& other) :
+	SPOSetT<T>(other)
+{
+	for (auto& element : other.components) {
+		this->add(element->makeClone());
+	}
+}
+
+template <typename T>
+CompositeSPOSetT<T>::~CompositeSPOSetT() = default;
+
+template <typename T>
+void
+CompositeSPOSetT<T>::add(std::unique_ptr<SPOSetT<T>> component)
+{
+	if (components.empty())
+		component_offsets.push_back(0); // add 0
+
+	int norbs = component->size();
+	components.push_back(std::move(component));
+	component_values.emplace_back(norbs);
+	component_gradients.emplace_back(norbs);
+	component_laplacians.emplace_back(norbs);
+
+	this->OrbitalSetSize += norbs;
+	component_offsets.push_back(this->OrbitalSetSize);
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::report()
+{
+	app_log() << "CompositeSPOSetT" << std::endl;
+	app_log() << "  ncomponents = " << components.size() << std::endl;
+	app_log() << "  components" << std::endl;
+	for (int i = 0; i < components.size(); ++i) {
+		app_log() << "    " << i << std::endl;
+		components[i]->basic_report("      ");
+	}
+}
+
+template <typename T>
+std::unique_ptr<SPOSetT<T>>
+CompositeSPOSetT<T>::makeClone() const
+{
+	return std::make_unique<CompositeSPOSetT<T>>(*this);
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluateValue(
+	const ParticleSet& P, int iat, ValueVector& psi)
+{
+	int n = 0;
+	for (int c = 0; c < components.size(); ++c) {
+		SPOSetT<T>& component = *components[c];
+		ValueVector& values = component_values[c];
+		component.evaluateValue(P, iat, values);
+		std::copy(values.begin(), values.end(), psi.begin() + n);
+		n += component.size();
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluateVGL(const ParticleSet& P, int iat,
+	ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+	int n = 0;
+	for (int c = 0; c < components.size(); ++c) {
+		SPOSetT<T>& component = *components[c];
+		ValueVector& values = component_values[c];
+		GradVector& gradients = component_gradients[c];
+		ValueVector& laplacians = component_laplacians[c];
+		component.evaluateVGL(P, iat, values, gradients, laplacians);
+		std::copy(values.begin(), values.end(), psi.begin() + n);
+		std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n);
+		std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n);
+		n += component.size();
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+{
+	const int nat = last - first;
+	for (int c = 0; c < components.size(); ++c) {
+		int norb = components[c]->size();
+		ValueMatrix v(nat, norb);
+		GradMatrix g(nat, norb);
+		ValueMatrix l(nat, norb);
+		components[c]->evaluate_notranspose(P, first, last, v, g, l);
+		int n = component_offsets[c];
+		MatrixOperators::insert_columns(v, logdet, n);
+		MatrixOperators::insert_columns(g, dlogdet, n);
+		MatrixOperators::insert_columns(l, d2logdet, n);
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+	HessMatrix& grad_grad_logdet)
+{
+	const int nat = last - first;
+	for (int c = 0; c < components.size(); ++c) {
+		int norb = components[c]->size();
+		ValueMatrix v(nat, norb);
+		GradMatrix g(nat, norb);
+		HessMatrix h(nat, norb);
+		components[c]->evaluate_notranspose(P, first, last, v, g, h);
+		int n = component_offsets[c];
+		MatrixOperators::insert_columns(v, logdet, n);
+		MatrixOperators::insert_columns(g, dlogdet, n);
+		MatrixOperators::insert_columns(h, grad_grad_logdet, n);
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+	HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
+{
+	not_implemented(
+		"evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+}
+
+// Class concrete types from ValueType
+template class CompositeSPOSetT<double>;
+template class CompositeSPOSetT<float>;
+template class CompositeSPOSetT<std::complex<double>>;
+template class CompositeSPOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h
new file mode 100644
index 0000000000..c8d156ac0c
--- /dev/null
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.h
@@ -0,0 +1,112 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_COMPOSITE_SPOSETT_H
+#define QMCPLUSPLUS_COMPOSITE_SPOSETT_H
+
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "QMCWaveFunctions/SPOSetBuilder.h"
+#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+class CompositeSPOSetT : public SPOSetT<T>
+{
+public:
+	using ValueVector = typename SPOSetT<T>::ValueVector;
+	using GradVector = typename SPOSetT<T>::GradVector;
+	using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+	using GradMatrix = typename SPOSetT<T>::GradMatrix;
+	using HessMatrix = typename SPOSetT<T>::HessMatrix;
+	using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+
+	/// component SPOSets
+	std::vector<std::unique_ptr<SPOSetT<T>>> components;
+	/// temporary storage for values
+	std::vector<ValueVector> component_values;
+	/// temporary storage for gradients
+	std::vector<GradVector> component_gradients;
+	/// temporary storage for laplacians
+	std::vector<ValueVector> component_laplacians;
+	/// store the precomputed offsets
+	std::vector<int> component_offsets;
+
+	CompositeSPOSetT(const std::string& my_name);
+	/**
+	 * @TODO: do we want template copy constructor
+	 * (i.e., copy from other with different type argument)?
+	 */
+	CompositeSPOSetT(const CompositeSPOSetT& other);
+	~CompositeSPOSetT() override;
+
+	std::string
+	getClassName() const override
+	{
+		return "CompositeSPOSetT";
+	}
+
+	/// add a sposet component to this composite sposet
+	void
+	add(std::unique_ptr<SPOSetT<T>> component);
+
+	/// print out component info
+	void
+	report();
+
+	// SPOSet interface methods
+	/// size is determined by component sposets and nothing else
+	inline void
+	setOrbitalSetSize(int norbs) override
+	{
+	}
+
+	std::unique_ptr<SPOSetT<T>>
+	makeClone() const override;
+
+	void
+	evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+	void
+	evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi,
+		GradVector& dpsi, ValueVector& d2psi) override;
+
+	/// unimplemented functions call this to abort
+	inline void
+	not_implemented(const std::string& method)
+	{
+		APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented");
+	}
+
+	// methods to be implemented in the future (possibly)
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet,
+		ValueMatrix& d2logdet) override;
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet,
+		HessMatrix& ddlogdet) override;
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
+		GGGMatrix& dddlogdet) override;
+};
+
+} // namespace qmcplusplus
+
+#endif

From 154e7a8163cf07c6671a515d9704f7ae4c923d9f Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Fri, 25 Aug 2023 10:40:36 -0600
Subject: [PATCH 15/17] added initial missing API

---
 src/QMCWaveFunctions/RotatedSPOsT.cpp | 15 +++++++++++++++
 src/QMCWaveFunctions/RotatedSPOsT.h   |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index 5a992ebce8..56a5e55cc9 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -1683,6 +1683,21 @@ std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
   return myclone;
 }
 
+template<typename T>
+RefVectorWithLeader<SPOSetT<T>> RotatedSPOsT<T>::extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list)
+{
+  auto& spo_leader = spo_list.template getCastedLeader<RotatedSPOsT<T>>();
+  const auto nw    = spo_list.size();
+  RefVectorWithLeader<SPOSetT<T>> phi_list(*spo_leader.Phi);
+  phi_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RotatedSPOsT<T>& rot = spo_list.template getCastedElement<RotatedSPOsT<T>>(iw);
+    phi_list.emplace_back(*rot.Phi);
+  }
+  return phi_list;
+}
+
 // Class concrete types from ValueType
 template class RotatedSPOsT<double>;
 template class RotatedSPOsT<float>;
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
index 3273681455..0be3ee0718 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.h
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -406,6 +406,8 @@ class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
   /// List of previously applied parameters
   std::vector<std::vector<RealType>> history_params_;
 
+  static RefVectorWithLeader<SPOSetT<T>> extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list);
+
   /// Use global rotation or history list
   bool use_global_rot_ = true;
 

From 86036b2663f1b77aaf9f11c5131ee8ced3a8a494 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Fri, 25 Aug 2023 11:18:49 -0600
Subject: [PATCH 16/17] finish mw_ APIs

---
 src/QMCWaveFunctions/RotatedSPOsT.cpp | 118 ++++++++++++++++++++++++++
 src/QMCWaveFunctions/RotatedSPOsT.h   |  84 +++++++++++++++---
 2 files changed, 191 insertions(+), 11 deletions(-)

diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index 56a5e55cc9..6dd1f4fdb5 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -1683,6 +1683,124 @@ std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
   return myclone;
 }
 
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                           const RefVector<ValueVector>& psi_list,
+                                           const std::vector<const T*>& invRow_ptr_list,
+                                           std::vector<std::vector<T>>& ratios_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSet>& P_list,
+                                       int iat,
+                                       const RefVector<ValueVector>& psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                     int iat,
+                                     const RefVector<ValueVector>& psi_v_list,
+                                     const RefVector<GradVector>& dpsi_v_list,
+                                     const RefVector<ValueVector>& d2psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                             int iat,
+                                             const RefVector<ValueVector>& psi_v_list,
+                                             const RefVector<GradVector>& dpsi_v_list,
+                                             const RefVector<ValueVector>& d2psi_v_list,
+                                             OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                                     int iat,
+                                                     const std::vector<const T*>& invRow_ptr_list,
+                                                     OffloadMWVGLArray& phi_vgl_v,
+                                                     std::vector<T>& ratios,
+                                                     std::vector<GradType>& grads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                                             int iat,
+                                                             const std::vector<const T*>& invRow_ptr_list,
+                                                             OffloadMWVGLArray& phi_vgl_v,
+                                                             std::vector<T>& ratios,
+                                                             std::vector<GradType>& grads,
+                                                             std::vector<T>& spingrads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads,
+                                                spingrads);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int first,
+                                              int last,
+                                              const RefVector<ValueMatrix>& logdet_list,
+                                              const RefVector<GradMatrix>& dlogdet_list,
+                                              const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createResource(ResourceCollection& collection) const
+{
+  Phi->createResource(collection);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::acquireResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.acquireResource(collection, phi_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::releaseResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.releaseResource(collection, phi_list);
+}
+
 template<typename T>
 RefVectorWithLeader<SPOSetT<T>> RotatedSPOsT<T>::extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list)
 {
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
index 0be3ee0718..58bdbcb4a2 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.h
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -33,17 +33,22 @@ template<class T>
 class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
 {
 public:
-  using IndexType    = typename SPOSetT<T>::IndexType;
-  using RealType     = typename SPOSetT<T>::RealType;
-  using FullRealType = typename SPOSetT<T>::FullRealType;
-  using ValueVector  = typename SPOSetT<T>::ValueVector;
-  using ValueMatrix  = typename SPOSetT<T>::ValueMatrix;
-  using GradVector   = typename SPOSetT<T>::GradVector;
-  using GradMatrix   = typename SPOSetT<T>::GradMatrix;
-  using HessVector   = typename SPOSetT<T>::HessVector;
-  using HessMatrix   = typename SPOSetT<T>::HessMatrix;
-  using GGGVector    = typename SPOSetT<T>::GGGVector;
-  using GGGMatrix    = typename SPOSetT<T>::GGGMatrix;
+  using IndexType         = typename SPOSetT<T>::IndexType;
+  using RealType          = typename SPOSetT<T>::RealType;
+  using FullRealType      = typename SPOSetT<T>::FullRealType;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using HessVector        = typename SPOSetT<T>::HessVector;
+  using HessMatrix        = typename SPOSetT<T>::HessMatrix;
+  using GGGVector         = typename SPOSetT<T>::GGGVector;
+  using GGGMatrix         = typename SPOSetT<T>::GGGMatrix;
+  using OffloadMWVGLArray = typename SPOSetT<T>::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename SPOSetT<T>::OffloadMWVArray;
+  template<typename DT>
+  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
 
   // constructor
   RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
@@ -393,6 +398,63 @@ class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
   /// Use history list (false) or global rotation (true)
   void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
 
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const T*>& invRow_ptr_list,
+                            std::vector<std::vector<T>>& ratios_list) const override;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSet>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<T>& ratios,
+                                      std::vector<GradType>& grads) const override;
+
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<T>& spingrads) const override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
 private:
   /// true if SPO parameters (orbital rotation parameters) have been supplied
   /// by input

From aa153a5c5d5dd354905e5a75d4ef24fbb849ba7b Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Fri, 25 Aug 2023 11:23:50 -0600
Subject: [PATCH 17/17] add timer

---
 src/QMCWaveFunctions/RotatedSPOsT.cpp | 17 ++++++++++++++---
 src/QMCWaveFunctions/RotatedSPOsT.h   |  3 +++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index 6dd1f4fdb5..a8a91bfa6a 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -23,7 +23,12 @@ namespace qmcplusplus
 {
 template<typename T>
 RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
-    : SPOSetT<T>(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
+    : SPOSetT<T>(my_name),
+      OptimizableObject(my_name),
+      Phi(std::move(spos)),
+      nel_major_(0),
+      params_supplied(false),
+      apply_rotation_timer_(createGlobalTimer("RotatedSPOsT::apply_rotation", timer_level_fine))
 {
   this->OrbitalSetSize = Phi->getOrbitalSetSize();
 }
@@ -425,7 +430,10 @@ void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool us
 	  Finally, apply unitary matrix to orbs.
 	*/
   exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+  {
+    ScopedTimer local(apply_rotation_timer_);
+    Phi->applyRotation(rot_mat, use_stored_copy);
+  }
 }
 
 template<typename T>
@@ -437,7 +445,10 @@ void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_para
   ValueMatrix new_rot_mat(nmo, nmo);
   constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
 
-  Phi->applyRotation(new_rot_mat, true);
+  {
+    ScopedTimer local(apply_rotation_timer_);
+    Phi->applyRotation(new_rot_mat, true);
+  }
 }
 
 template<typename T>
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
index 58bdbcb4a2..1cdd5246b5 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.h
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -465,6 +465,9 @@ class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
   /// Full set of rotation matrix parameters for use in global rotation method
   opt_variables_type myVarsFull;
 
+  /// timer for apply_rotation
+  NewTimer& apply_rotation_timer_;
+
   /// List of previously applied parameters
   std::vector<std::vector<RealType>> history_params_;