diff --git a/src/QMCWaveFunctions/BasisSetBase.h b/src/QMCWaveFunctions/BasisSetBase.h
index 7be77b13cb..8837e18832 100644
--- a/src/QMCWaveFunctions/BasisSetBase.h
+++ b/src/QMCWaveFunctions/BasisSetBase.h
@@ -134,9 +134,8 @@ struct SoaBasisSetBase
   using vgl_type          = VectorSoaContainer<T, OHMMS_DIM + 2>;
   using vgh_type          = VectorSoaContainer<T, 10>;
   using vghgh_type        = VectorSoaContainer<T, 20>;
-  using ValueType         = QMCTraits::ValueType;
-  using OffloadMWVGLArray = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>; // [VGL, walker, Orbs]
-  using OffloadMWVArray   = Array<ValueType, 2, OffloadPinnedAllocator<ValueType>>; // [walker, Orbs]
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
 
   ///size of the basis set
   int BasisSetSize;
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
new file mode 100644
index 0000000000..ba90502537
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
@@ -0,0 +1,249 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file BsplineSetT.h
+ *
+ * BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implementation
+ */
+#ifndef QMCPLUSPLUS_BSPLINESETT_H
+#define QMCPLUSPLUS_BSPLINESETT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "spline/einspline_engine.hpp"
+#include "spline/einspline_util.hpp"
+
+namespace qmcplusplus
+{
+/** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R.
+ * Its derived template classes manage the storage and evaluation at given precision.
+ * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class.
+ */
+template<class T>
+class BsplineSetT : public SPOSetT<T>
+{
+public:
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using GGGVector   = typename SPOSetT<T>::GGGVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+
+  using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
+  using grad_type  = typename SPOSetT<T>::GradMatrix::value_type;
+
+  // used in derived classes
+  using RealType  = typename SPOSetT<T>::RealType;
+  using ValueType = typename SPOSetT<T>::ValueType;
+
+  BsplineSetT(const std::string& my_name) : SPOSetT<T>(my_name), MyIndex(0), first_spo(0), last_spo(0) {}
+
+  virtual bool isComplex() const         = 0;
+  virtual std::string getKeyword() const = 0;
+
+  auto& getHalfG() const { return HalfG; }
+
+  inline void init_base(int n)
+  {
+    kPoints.resize(n);
+    MakeTwoCopies.resize(n);
+    BandIndexMap.resize(n);
+    for (int i = 0; i < n; i++)
+      BandIndexMap[i] = i;
+  }
+
+  ///remap kpoints to group general kpoints & special kpoints
+  int remap_kpoints()
+  {
+    std::vector<PosType> k_copy(kPoints);
+    const int nk = kPoints.size();
+    int nCB      = 0;
+    //two pass
+    for (int i = 0; i < nk; ++i)
+    {
+      if (MakeTwoCopies[i])
+      {
+        kPoints[nCB]        = k_copy[i];
+        BandIndexMap[nCB++] = i;
+      }
+    }
+    int nRealBands = nCB;
+    for (int i = 0; i < nk; ++i)
+    {
+      if (!MakeTwoCopies[i])
+      {
+        kPoints[nRealBands]        = k_copy[i];
+        BandIndexMap[nRealBands++] = i;
+      }
+    }
+    return nCB; //return the number of complex bands
+  }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override = 0;
+
+  void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      ValueVector l(d2logdet[i], d2logdet.cols());
+      this->evaluateVGL(P, iat, v, g, l);
+    }
+  }
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    const size_t nw = spo_list.size();
+    std::vector<ValueVector> mw_psi_v;
+    std::vector<GradVector> mw_dpsi_v;
+    std::vector<ValueVector> mw_d2psi_v;
+    RefVector<ValueVector> psi_v_list;
+    RefVector<GradVector> dpsi_v_list;
+    RefVector<ValueVector> d2psi_v_list;
+    mw_psi_v.reserve(nw);
+    mw_dpsi_v.reserve(nw);
+    mw_d2psi_v.reserve(nw);
+    psi_v_list.reserve(nw);
+    dpsi_v_list.reserve(nw);
+    d2psi_v_list.reserve(nw);
+
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      mw_psi_v.clear();
+      mw_dpsi_v.clear();
+      mw_d2psi_v.clear();
+      psi_v_list.clear();
+      dpsi_v_list.clear();
+      d2psi_v_list.clear();
+
+      for (int iw = 0; iw < nw; iw++)
+      {
+        mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols());
+        mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
+        mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
+        psi_v_list.push_back(mw_psi_v.back());
+        dpsi_v_list.push_back(mw_dpsi_v.back());
+        d2psi_v_list.push_back(mw_d2psi_v.back());
+      }
+
+      this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+    }
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      this->evaluateVGH(P, iat, v, g, h);
+    }
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
+      this->evaluateVGHGH(P, iat, v, g, h, gh);
+    }
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override
+  {
+    //Do nothing, since Einsplines don't explicitly depend on ion positions.
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    //Do nothing, since Einsplines don't explicitly depend on ion positions.
+  }
+
+  template<class BSPLINESPO>
+  friend struct SplineSetReader;
+  friend struct BsplineReaderBase;
+
+
+protected:
+  static const int D = QMCTraits::DIM;
+  ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases
+  size_t MyIndex;
+  ///first index of the SPOs this Spline handles
+  size_t first_spo;
+  ///last index of the SPOs this Spline handles
+  size_t last_spo;
+  ///sign bits at the G/2 boundaries
+  TinyVector<int, D> HalfG;
+  ///flags to unpack sin/cos
+  std::vector<bool> MakeTwoCopies;
+  /** kpoints for each unique orbitals.
+   * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated.
+   * Changing the sign requires updating all the evaluation code.
+   */
+  std::vector<PosType> kPoints;
+  ///remap splines to orbitals
+  aligned_vector<int> BandIndexMap;
+  ///band offsets used for communication
+  std::vector<int> offset;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
new file mode 100644
index 0000000000..155dd8a220
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
@@ -0,0 +1,800 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include <complex>
+#include "Concurrency/OpenMP.h"
+#include "SplineC2CT.h"
+#include "spline2/MultiBsplineEval.hpp"
+#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp"
+#include "CPU/math.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+SplineC2CT<T>::SplineC2CT(const SplineC2CT& in) = default;
+
+template<class T>
+inline void SplineC2CT<T>::set_spline(SingleSplineType* spline_r,
+                                      SingleSplineType* spline_i,
+                                      int twist,
+                                      int ispline,
+                                      int level)
+{
+  SplineInst->copy_spline(spline_r, 2 * ispline);
+  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
+}
+
+template<class T>
+bool SplineC2CT<T>::read_splines(hdf_archive& h5f)
+{
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+}
+
+template<class T>
+bool SplineC2CT<T>::write_splines(hdf_archive& h5f)
+{
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+}
+
+template<class T>
+void SplineC2CT<T>::storeParamsBeforeRotation()
+{
+  const auto spline_ptr     = SplineInst->getSplinePtr();
+  const auto coefs_tot_size = spline_ptr->coefs_size;
+  coef_copy_                = std::make_shared<std::vector<RealType>>(coefs_tot_size);
+
+  std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
+}
+
+/*
+  ~~ Notes for rotation ~~
+  spl_coefs      = Raw pointer to spline coefficients
+  basis_set_size = Number of spline coefs per orbital
+  OrbitalSetSize = Number of orbitals (excluding padding)
+
+  spl_coefs has a complicated layout depending on dimensionality of splines.
+  Luckily, for our purposes, we can think of spl_coefs as pointing to a
+  matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline
+  index adjacent in memory. The orbital index is SIMD aligned and therefore
+  may include padding.
+
+  As a result, due to SIMD alignment, Nsplines may be larger than the
+  actual number of splined orbitals. This means that in practice rot_mat
+  may be smaller than the number of 'columns' in the coefs array!
+
+      SplineR2R spl_coef layout:
+             ^         | sp1 | ... | spN | pad |
+             |         |=====|=====|=====|=====|
+             |         | c11 | ... | c1N | 0   |
+      basis_set_size   | c21 | ... | c2N | 0   |
+             |         | ... | ... | ... | 0   |
+             |         | cM1 | ... | cMN | 0   |
+             v         |=====|=====|=====|=====|
+                       <------ Nsplines ------>
+
+      SplineC2C spl_coef layout:
+             ^         | sp1_r | sp1_i |  ...  | spN_r | spN_i |  pad  |
+             |         |=======|=======|=======|=======|=======|=======|
+             |         | c11_r | c11_i |  ...  | c1N_r | c1N_i |   0   |
+      basis_set_size   | c21_r | c21_i |  ...  | c2N_r | c2N_i |   0   |
+             |         |  ...  |  ...  |  ...  |  ...  |  ...  |  ...  |
+             |         | cM1_r | cM1_i |  ...  | cMN_r | cMN_i |   0   |
+             v         |=======|=======|=======|=======|=======|=======|
+                       <------------------ Nsplines ------------------>
+
+  NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs
+  "matrix" is very tall and skinny.
+*/
+template<class T>
+void SplineC2CT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp
+  const auto spline_ptr = SplineInst->getSplinePtr();
+  assert(spline_ptr != nullptr);
+  const auto spl_coefs      = spline_ptr->coefs;
+  const auto Nsplines       = spline_ptr->num_splines; // May include padding
+  const auto coefs_tot_size = spline_ptr->coefs_size;
+  const auto basis_set_size = coefs_tot_size / Nsplines;
+  assert(this->OrbitalSetSize == rot_mat.rows());
+  assert(this->OrbitalSetSize == rot_mat.cols());
+
+  if (!use_stored_copy)
+  {
+    assert(coef_copy_ != nullptr);
+    std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
+  }
+
+  for (int i = 0; i < basis_set_size; i++)
+    for (int j = 0; j < this->OrbitalSetSize; j++)
+    {
+      // cur_elem points to the real componend of the coefficient.
+      // Imag component is adjacent in memory.
+      const auto cur_elem = Nsplines * i + 2 * j;
+      RealType newval_r{0.};
+      RealType newval_i{0.};
+      for (auto k = 0; k < this->OrbitalSetSize; k++)
+      {
+        const auto index = Nsplines * i + 2 * k;
+        RealType zr      = (*coef_copy_)[index];
+        RealType zi      = (*coef_copy_)[index + 1];
+        RealType wr      = rot_mat[k][j].real();
+        RealType wi      = rot_mat[k][j].imag();
+        newval_r += zr * wr - zi * wi;
+        newval_i += zr * wi + zi * wr;
+      }
+      spl_coefs[cur_elem]     = newval_r;
+      spl_coefs[cur_elem + 1] = newval_i;
+    }
+}
+
+template<class T>
+inline void SplineC2CT<T>::assign_v(const PointType& r,
+                                    const vContainer_type& myV,
+                                    ValueVector& psi,
+                                    int first,
+                                    int last) const
+{
+  const auto kPointsSize = this->kPoints.size();
+  // protect last
+  last = last > kPointsSize ? kPointsSize : last;
+
+  const RealType x = r[0], y = r[1], z = r[2];
+  const RealType* restrict kx = myKcart.data(0);
+  const RealType* restrict ky = myKcart.data(1);
+  const RealType* restrict kz = myKcart.data(2);
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    RealType s, c;
+    const RealType val_r = myV[2 * j];
+    const RealType val_i = myV[2 * j + 1];
+    qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
+    psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+    assign_v(r, myV, psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                      ValueVector& psi,
+                                      const ValueVector& psiinv,
+                                      std::vector<ValueType>& ratios)
+{
+  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
+
+#pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    // initialize thread private ratios
+    if (need_resize)
+    {
+      if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime
+        ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
+#pragma omp barrier
+    }
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), tid, first, last);
+    const int first_cplx   = first / 2;
+    const auto kPointsSize = this->kPoints.size();
+    const int last_cplx    = kPointsSize < last / 2 ? kPointsSize : last / 2;
+
+    for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+    {
+      const PointType& r = VP.activeR(iat);
+      PointType ru(PrimLattice.toUnit_floor(r));
+
+      spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+      assign_v(r, myV, psi, first_cplx, last_cplx);
+      ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx);
+    }
+  }
+
+  // do the reduction manually
+  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+  {
+    ratios[iat] = ComplexT(0);
+    for (int tid = 0; tid < ratios_private.cols(); tid++)
+      ratios[iat] += ratios_private[iat][tid];
+  }
+}
+
+/** assign_vgl
+   */
+template<class T>
+inline void SplineC2CT<T>::assign_vgl(const PointType& r,
+                                      ValueVector& psi,
+                                      GradVector& dpsi,
+                                      ValueVector& d2psi,
+                                      int first,
+                                      int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last > kPointsSize ? kPointsSize : last;
+
+  constexpr RealType zero(0);
+  constexpr RealType two(2);
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+  const RealType symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const RealType lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
+    const RealType lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
+    const RealType lap_r   = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const RealType lap_i   = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+    const size_t psiIndex  = j + this->first_spo;
+    psi[psiIndex]          = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]      = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]      = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]      = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+    d2psi[psiIndex]        = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
+  }
+}
+
+/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
+   */
+template<class T>
+inline void SplineC2CT<T>::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  constexpr RealType two(2);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0 = myG.data(0);
+  const RealType* restrict g1 = myG.data(1);
+  const RealType* restrict g2 = myG.data(2);
+
+  const size_t N = this->last_spo - this->first_spo;
+#pragma omp simd
+  for (size_t j = 0; j < N; ++j)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g0[jr];
+    const RealType dY_r = g1[jr];
+    const RealType dZ_r = g2[jr];
+
+    const RealType dX_i = g0[ji];
+    const RealType dY_i = g1[ji];
+    const RealType dZ_i = g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const RealType lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const RealType lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+    d2psi[psiIndex]       = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGL(const ParticleSet& P,
+                                const int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::assign_vgh(const PointType& r,
+                               ValueVector& psi,
+                               GradVector& dpsi,
+                               HessVector& grad_grad_psi,
+                               int first,
+                               int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last > kPointsSize ? kPointsSize : last;
+
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    const RealType h_xx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
+    const RealType h_xy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
+    const RealType h_xz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
+    const RealType h_yx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
+    const RealType h_yy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
+    const RealType h_yz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
+    const RealType h_zx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
+    const RealType h_zy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
+    const RealType h_zz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
+
+    const RealType h_xx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
+    const RealType h_xy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
+    const RealType h_xz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
+    const RealType h_yx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
+    const RealType h_yy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
+    const RealType h_yz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
+    const RealType h_zx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
+    const RealType h_zy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
+    const RealType h_zz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGH(const ParticleSet& P,
+                                const int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                HessVector& grad_grad_psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::assign_vghgh(const PointType& r,
+                                 ValueVector& psi,
+                                 GradVector& dpsi,
+                                 HessVector& grad_grad_psi,
+                                 GGGVector& grad_grad_grad_psi,
+                                 int first,
+                                 int last) const
+{
+  // protect last
+  const auto kPointsSize = this->kPoints.size();
+  last                   = last < 0 ? kPointsSize : (last > kPointsSize ? kPointsSize : last);
+
+  const RealType g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+                 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+                 g22 = PrimLattice.G(8);
+  const RealType x = r[0], y = r[1], z = r[2];
+
+  const RealType* restrict k0 = myKcart.data(0);
+  const RealType* restrict k1 = myKcart.data(1);
+  const RealType* restrict k2 = myKcart.data(2);
+
+  const RealType* restrict g0  = myG.data(0);
+  const RealType* restrict g1  = myG.data(1);
+  const RealType* restrict g2  = myG.data(2);
+  const RealType* restrict h00 = myH.data(0);
+  const RealType* restrict h01 = myH.data(1);
+  const RealType* restrict h02 = myH.data(2);
+  const RealType* restrict h11 = myH.data(3);
+  const RealType* restrict h12 = myH.data(4);
+  const RealType* restrict h22 = myH.data(5);
+
+  const RealType* restrict gh000 = mygH.data(0);
+  const RealType* restrict gh001 = mygH.data(1);
+  const RealType* restrict gh002 = mygH.data(2);
+  const RealType* restrict gh011 = mygH.data(3);
+  const RealType* restrict gh012 = mygH.data(4);
+  const RealType* restrict gh022 = mygH.data(5);
+  const RealType* restrict gh111 = mygH.data(6);
+  const RealType* restrict gh112 = mygH.data(7);
+  const RealType* restrict gh122 = mygH.data(8);
+  const RealType* restrict gh222 = mygH.data(9);
+
+//SIMD doesn't work quite right yet.  Comment out until further debugging.
+#pragma omp simd
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const RealType kX    = k0[j];
+    const RealType kY    = k1[j];
+    const RealType kZ    = k2[j];
+    const RealType val_r = myV[jr];
+    const RealType val_i = myV[ji];
+
+    //phase
+    RealType s, c;
+    qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    //dot(PrimLattice.G,myG[j])
+    const RealType dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const RealType dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const RealType dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const RealType dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const RealType dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const RealType dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const RealType gX_r = dX_r + val_i * kX;
+    const RealType gY_r = dY_r + val_i * kY;
+    const RealType gZ_r = dZ_r + val_i * kZ;
+    const RealType gX_i = dX_i - val_r * kX;
+    const RealType gY_i = dY_i - val_r * kY;
+    const RealType gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
+    const RealType f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
+    const RealType f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
+    const RealType f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
+    const RealType f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
+    const RealType f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
+    const RealType f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
+
+    const RealType f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
+    const RealType f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
+    const RealType f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
+    const RealType f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
+    const RealType f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
+    const RealType f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
+
+    const RealType h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
+    const RealType h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
+    const RealType h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
+    const RealType h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
+    const RealType h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
+    const RealType h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
+
+    const RealType h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
+    const RealType h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
+    const RealType h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
+    const RealType h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
+    const RealType h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
+    const RealType h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+
+    //These are the real and imaginary components of the third SPO derivative.  _xxx denotes
+    // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
+
+    const RealType f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const RealType f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const RealType f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const RealType f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const RealType f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const RealType f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const RealType f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const RealType f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const RealType f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const RealType f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                          gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    const RealType f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const RealType f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const RealType f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const RealType f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const RealType f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const RealType f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const RealType f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const RealType f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const RealType f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const RealType f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                          gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+    const RealType gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
+    const RealType gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
+    const RealType gh_xxy_r =
+        f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
+    const RealType gh_xxy_i =
+        f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
+    const RealType gh_xxz_r =
+        f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
+    const RealType gh_xxz_i =
+        f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
+    const RealType gh_xyy_r =
+        f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
+    const RealType gh_xyy_i =
+        f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
+    const RealType gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
+        (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
+    const RealType gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
+        (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
+    const RealType gh_xzz_r =
+        f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
+    const RealType gh_xzz_i =
+        f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
+    const RealType gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
+    const RealType gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
+    const RealType gh_yyz_r =
+        f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
+    const RealType gh_yyz_i =
+        f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
+    const RealType gh_yzz_r =
+        f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
+    const RealType gh_yzz_i =
+        f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
+    const RealType gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
+    const RealType gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
+
+    grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
+    grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+
+    grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
+    grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+
+
+    grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
+  }
+}
+
+template<class T>
+void SplineC2CT<T>::evaluateVGHGH(const ParticleSet& P,
+                                  const int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  HessVector& grad_grad_psi,
+                                  GGGVector& grad_grad_grad_psi)
+{
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+#pragma omp parallel
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<T>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
+    assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
+  }
+}
+
+template class SplineC2CT<std::complex<float>>;
+template class SplineC2CT<std::complex<double>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
new file mode 100644
index 0000000000..fd55fcd9f2
--- /dev/null
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
@@ -0,0 +1,236 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file
+ *
+ * class to handle complex splines to complex orbitals with splines of arbitrary precision
+ */
+#ifndef QMCPLUSPLUS_SPLINE_C2CT_H
+#define QMCPLUSPLUS_SPLINE_C2CT_H
+
+#include <memory>
+#include "BsplineSetT.h"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "spline2/MultiBspline.hpp"
+#include "Utilities/FairDivide.h"
+
+namespace qmcplusplus
+{
+/** class to match std::complex<T> spline with BsplineSet::ValueType (complex) SPOs
+ * @tparam T precision of spline
+ *
+ * Requires temporage storage and multiplication of phase vectors
+ * The internal storage of complex spline coefficients uses double sized real arrays of T type, aligned and padded.
+ * All the output orbitals are complex.
+ */
+template<class T>
+class SplineC2CT : public BsplineSetT<T>
+{
+public:
+  using RealType         = typename BsplineSetT<T>::RealType;
+  using SplineType       = typename bspline_traits<RealType, 3>::SplineType;
+  using BCType           = typename bspline_traits<RealType, 3>::BCType;
+  using DataType         = RealType;
+  using PointType        = TinyVector<RealType, 3>;
+  using SingleSplineType = UBspline_3d_d;
+
+
+  // types for evaluation results
+  // only works for Complex
+  using ComplexT    = T;
+  using ValueType   = typename BsplineSetT<T>::ValueType;
+  using GGGVector   = typename BsplineSetT<T>::GGGVector;
+  using GradVector  = typename BsplineSetT<T>::GradVector;
+  using HessVector  = typename BsplineSetT<T>::HessVector;
+  using ValueVector = typename BsplineSetT<T>::ValueVector;
+  using ValueMatrix = typename BsplineSetT<T>::ValueMatrix;
+
+  using vContainer_type  = Vector<RealType, aligned_allocator<RealType>>;
+  using gContainer_type  = VectorSoaContainer<RealType, 3>;
+  using hContainer_type  = VectorSoaContainer<RealType, 6>;
+  using ghContainer_type = VectorSoaContainer<RealType, 10>;
+
+public:
+  SplineC2CT<T>(const std::string& my_name) : BsplineSetT<T>(my_name) {}
+
+  SplineC2CT<T>(const SplineC2CT<T>& in);
+  virtual std::string getClassName() const final { return "SplineC2C"; }
+  virtual std::string getKeyword() const final { return "SplineC2C"; }
+  bool isComplex() const final { return true; };
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<SplineC2CT<T>>(*this); }
+
+  bool isRotationSupported() const final { return true; }
+
+  /// Store an original copy of the spline coefficients for orbital rotation
+  void storeParamsBeforeRotation() final;
+
+  /*
+    Implements orbital rotations via [1,2].
+    Should be called by RotatedSPOs::apply_rotation()
+    This implementation requires that NSPOs > Nelec. In other words,
+    if you want to run a orbopt wfn, you must include some virtual orbitals!
+    Some results (using older Berkeley branch) were published in [3].
+    [1] Filippi & Fahy, JCP 112, (2000)
+    [2] Toulouse & Umrigar, JCP 126, (2007)
+    [3] Townsend et al., PRB 102, (2020)
+  */
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<T>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+
+    auto& offset = this->offset;
+    offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, offset);
+    for (size_t ib = 0; ib < offset.size(); ib++)
+      offset[ib] *= 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<T>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    const auto& kPoints = this->kPoints;
+    const size_t nk     = kPoints.size();
+    mKK.resize(nk);
+    myKcart.resize(nk);
+    for (size_t i = 0; i < nk; ++i)
+    {
+      mKK[i]     = -dot(kPoints[i], kPoints[i]);
+      myKcart(i) = kPoints[i];
+    }
+  }
+
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
+
+  bool read_splines(hdf_archive& h5f);
+
+  bool write_splines(hdf_archive& h5f);
+
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
+
+  void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override;
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<ValueType>& ratios) override;
+
+  /** assign_vgl
+   */
+  void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last)
+      const;
+
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
+   */
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluateVGL(const ParticleSet& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   ValueVector& d2psi) override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  void evaluateVGH(const ParticleSet& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     const int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override;
+
+  template<class BSPLINESPO>
+  friend struct SplineSetReader;
+  friend struct BsplineReaderBase;
+
+protected:
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
+
+private:
+  ///primitive cell
+  CrystalLattice<RealType, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
+  Tensor<RealType, 3> GGt;
+  ///multi bspline set
+  std::shared_ptr<MultiBspline<RealType>> SplineInst;
+
+  ///Copy of original splines for orbital rotation
+  std::shared_ptr<std::vector<RealType>> coef_copy_;
+
+  vContainer_type mKK;
+  VectorSoaContainer<RealType, 3> myKcart;
+
+  ///thread private ratios for reduction when using nested threading, numVP x numThread
+  Matrix<ComplexT> ratios_private;
+};
+
+extern template class SplineC2CT<float>;
+extern template class SplineC2CT<double>;
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 8a0611a6f5..72f20ee447 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -28,22 +28,27 @@ set(WFBASE_SRCS
     WaveFunctionComponent.cpp
     WaveFunctionComponentBuilder.cpp
     SPOSetBuilder.cpp
+    SPOSetBuilderT.cpp
     SPOInfo.cpp
     SPOSetInfo.cpp
     SPOSetInputInfo.cpp
     SPOSet.cpp
+    SPOSetT.cpp
     CompositeSPOSet.cpp
+    CompositeSPOSetT.cpp
     HarmonicOscillator/SHOSet.cpp
+    HarmonicOscillator/SHOSetT.cpp
     HarmonicOscillator/SHOSetBuilder.cpp
+    HarmonicOscillator/SHOSetBuilderT.cpp
     ExampleHeBuilder.cpp
     ExampleHeComponent.cpp)
 
 if(NOT QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp)
 endif(NOT QMC_COMPLEX)
 
 if(QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp SpinorSetT.cpp)
 endif(QMC_COMPLEX)
 ########################
 # build jastrows
@@ -62,21 +67,21 @@ set(JASTROW_SRCS
 set(JASTROW_OMPTARGET_SRCS
     Jastrow/TwoBodyJastrow.cpp
     Jastrow/BsplineFunctor.cpp)
-set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalBuilder.cpp)
+set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalT.cpp ElectronGas/FreeOrbitalBuilder.cpp)
 
 # wavefunctions only availbale to 3-dim problems
 if(OHMMS_DIM MATCHES 3)
 
   set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/eeI_JastrowBuilder.cpp Jastrow/CountingJastrowBuilder.cpp)
 
-  set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp
+  set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalSetT.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp
                    LCAO/AOBasisBuilder.cpp LCAO/SoaLocalizedBasisSet.cpp)
   if(QMC_COMPLEX)
     set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp)
   else(QMC_COMPLEX)
     #LCAO cusp correction is not ready for complex
-    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp
-                     LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp LCAO/LCAOrbitalSetWithCorrectionT.cpp
+	    LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp LCAO/SoaCuspCorrectionT.cpp)
   endif(QMC_COMPLEX)
 
   if(HAVE_EINSPLINE)
@@ -98,7 +103,7 @@ if(OHMMS_DIM MATCHES 3)
         BsplineFactory/BsplineReaderBase.cpp)
     set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp)
     if(QMC_COMPLEX)
-      set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp)
+      set(FERMION_SRCS ${FERMION_SRCS} EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp BsplineFactory/SplineC2CT.cpp)
       set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp)
     else(QMC_COMPLEX)
       set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp
@@ -109,11 +114,11 @@ if(OHMMS_DIM MATCHES 3)
   endif(HAVE_EINSPLINE)
 
   # plane wave SPO
-  set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalBuilder.cpp)
+  set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWBasisT.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalBuilder.cpp)
   if(QMC_COMPLEX)
-    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp PlaneWave/PWOrbitalSetT.cpp)
   else()
-    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWRealOrbitalSet.cpp PlaneWave/PWRealOrbitalSetT.cpp)
   endif(QMC_COMPLEX)
 
   if(NOT QMC_COMPLEX)
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
new file mode 100644
index 0000000000..1d635e8a41
--- /dev/null
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
@@ -0,0 +1,193 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "CompositeSPOSetT.h"
+
+#include "OhmmsData/AttributeSet.h"
+#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
+#include "Utilities/IteratorUtility.h"
+
+#include <algorithm>
+
+namespace qmcplusplus
+{
+namespace MatrixOperators
+{
+/** copy a small matrix (N, M1) to a big matrix (N, M2), M2>M1
+ * @param small input matrix
+ * @param big outout matrix
+ * @param offset_c column offset
+ *
+ * @todo smater and more efficient matrix, move up for others
+ * The columns [0,M1) are inserted into [offset_c,offset_c+M1).
+ */
+template <typename MAT1, typename MAT2>
+inline void
+insert_columns(const MAT1& small, MAT2& big, int offset_c)
+{
+	const int c = small.cols();
+	for (int i = 0; i < small.rows(); ++i)
+		std::copy(small[i], small[i] + c, big[i] + offset_c);
+}
+} // namespace MatrixOperators
+
+template <typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const std::string& my_name) :
+	SPOSetT<T>(my_name)
+{
+	this->OrbitalSetSize = 0;
+	component_offsets.reserve(4);
+}
+
+template <typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const CompositeSPOSetT<T>& other) :
+	SPOSetT<T>(other)
+{
+	for (auto& element : other.components) {
+		this->add(element->makeClone());
+	}
+}
+
+template <typename T>
+CompositeSPOSetT<T>::~CompositeSPOSetT() = default;
+
+template <typename T>
+void
+CompositeSPOSetT<T>::add(std::unique_ptr<SPOSetT<T>> component)
+{
+	if (components.empty())
+		component_offsets.push_back(0); // add 0
+
+	int norbs = component->size();
+	components.push_back(std::move(component));
+	component_values.emplace_back(norbs);
+	component_gradients.emplace_back(norbs);
+	component_laplacians.emplace_back(norbs);
+
+	this->OrbitalSetSize += norbs;
+	component_offsets.push_back(this->OrbitalSetSize);
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::report()
+{
+	app_log() << "CompositeSPOSetT" << std::endl;
+	app_log() << "  ncomponents = " << components.size() << std::endl;
+	app_log() << "  components" << std::endl;
+	for (int i = 0; i < components.size(); ++i) {
+		app_log() << "    " << i << std::endl;
+		components[i]->basic_report("      ");
+	}
+}
+
+template <typename T>
+std::unique_ptr<SPOSetT<T>>
+CompositeSPOSetT<T>::makeClone() const
+{
+	return std::make_unique<CompositeSPOSetT<T>>(*this);
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluateValue(
+	const ParticleSet& P, int iat, ValueVector& psi)
+{
+	int n = 0;
+	for (int c = 0; c < components.size(); ++c) {
+		SPOSetT<T>& component = *components[c];
+		ValueVector& values = component_values[c];
+		component.evaluateValue(P, iat, values);
+		std::copy(values.begin(), values.end(), psi.begin() + n);
+		n += component.size();
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluateVGL(const ParticleSet& P, int iat,
+	ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+	int n = 0;
+	for (int c = 0; c < components.size(); ++c) {
+		SPOSetT<T>& component = *components[c];
+		ValueVector& values = component_values[c];
+		GradVector& gradients = component_gradients[c];
+		ValueVector& laplacians = component_laplacians[c];
+		component.evaluateVGL(P, iat, values, gradients, laplacians);
+		std::copy(values.begin(), values.end(), psi.begin() + n);
+		std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n);
+		std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n);
+		n += component.size();
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+{
+	const int nat = last - first;
+	for (int c = 0; c < components.size(); ++c) {
+		int norb = components[c]->size();
+		ValueMatrix v(nat, norb);
+		GradMatrix g(nat, norb);
+		ValueMatrix l(nat, norb);
+		components[c]->evaluate_notranspose(P, first, last, v, g, l);
+		int n = component_offsets[c];
+		MatrixOperators::insert_columns(v, logdet, n);
+		MatrixOperators::insert_columns(g, dlogdet, n);
+		MatrixOperators::insert_columns(l, d2logdet, n);
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+	HessMatrix& grad_grad_logdet)
+{
+	const int nat = last - first;
+	for (int c = 0; c < components.size(); ++c) {
+		int norb = components[c]->size();
+		ValueMatrix v(nat, norb);
+		GradMatrix g(nat, norb);
+		HessMatrix h(nat, norb);
+		components[c]->evaluate_notranspose(P, first, last, v, g, h);
+		int n = component_offsets[c];
+		MatrixOperators::insert_columns(v, logdet, n);
+		MatrixOperators::insert_columns(g, dlogdet, n);
+		MatrixOperators::insert_columns(h, grad_grad_logdet, n);
+	}
+}
+
+template <typename T>
+void
+CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSet& P, int first,
+	int last, ValueMatrix& logdet, GradMatrix& dlogdet,
+	HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
+{
+	not_implemented(
+		"evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+}
+
+// Class concrete types from ValueType
+template class CompositeSPOSetT<double>;
+template class CompositeSPOSetT<float>;
+template class CompositeSPOSetT<std::complex<double>>;
+template class CompositeSPOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h
new file mode 100644
index 0000000000..c8d156ac0c
--- /dev/null
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.h
@@ -0,0 +1,112 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
+// Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Mark A. Berrill,
+//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_COMPOSITE_SPOSETT_H
+#define QMCPLUSPLUS_COMPOSITE_SPOSETT_H
+
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "QMCWaveFunctions/SPOSetBuilder.h"
+#include "QMCWaveFunctions/SPOSetBuilderFactory.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+class CompositeSPOSetT : public SPOSetT<T>
+{
+public:
+	using ValueVector = typename SPOSetT<T>::ValueVector;
+	using GradVector = typename SPOSetT<T>::GradVector;
+	using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+	using GradMatrix = typename SPOSetT<T>::GradMatrix;
+	using HessMatrix = typename SPOSetT<T>::HessMatrix;
+	using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+
+	/// component SPOSets
+	std::vector<std::unique_ptr<SPOSetT<T>>> components;
+	/// temporary storage for values
+	std::vector<ValueVector> component_values;
+	/// temporary storage for gradients
+	std::vector<GradVector> component_gradients;
+	/// temporary storage for laplacians
+	std::vector<ValueVector> component_laplacians;
+	/// store the precomputed offsets
+	std::vector<int> component_offsets;
+
+	CompositeSPOSetT(const std::string& my_name);
+	/**
+	 * @TODO: do we want template copy constructor
+	 * (i.e., copy from other with different type argument)?
+	 */
+	CompositeSPOSetT(const CompositeSPOSetT& other);
+	~CompositeSPOSetT() override;
+
+	std::string
+	getClassName() const override
+	{
+		return "CompositeSPOSetT";
+	}
+
+	/// add a sposet component to this composite sposet
+	void
+	add(std::unique_ptr<SPOSetT<T>> component);
+
+	/// print out component info
+	void
+	report();
+
+	// SPOSet interface methods
+	/// size is determined by component sposets and nothing else
+	inline void
+	setOrbitalSetSize(int norbs) override
+	{
+	}
+
+	std::unique_ptr<SPOSetT<T>>
+	makeClone() const override;
+
+	void
+	evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+	void
+	evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi,
+		GradVector& dpsi, ValueVector& d2psi) override;
+
+	/// unimplemented functions call this to abort
+	inline void
+	not_implemented(const std::string& method)
+	{
+		APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented");
+	}
+
+	// methods to be implemented in the future (possibly)
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet,
+		ValueMatrix& d2logdet) override;
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet,
+		HessMatrix& ddlogdet) override;
+	void
+	evaluate_notranspose(const ParticleSet& P, int first, int last,
+		ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
+		GGGMatrix& dddlogdet) override;
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
new file mode 100644
index 0000000000..82428ebfe1
--- /dev/null
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
@@ -0,0 +1,714 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "FreeOrbitalT.h"
+
+namespace qmcplusplus
+{
+
+
+template<class T>
+void FreeOrbitalT<T>::evaluateVGL(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& pvec,
+                                  GradVector& dpvec,
+                                  ValueVector& d2pvec)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluateVGL(const ParticleSet& P,
+                                      int iat,
+                                      ValueVector& pvec,
+                                      GradVector& dpvec,
+                                      ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
+}
+
+template<>
+void FreeOrbitalT<double>::evaluateVGL(const ParticleSet& P,
+                                       int iat,
+                                       ValueVector& pvec,
+                                       GradVector& dpvec,
+                                       ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
+}
+
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateVGL(const ParticleSet& P,
+                                                    int iat,
+                                                    ValueVector& pvec,
+                                                    GradVector& dpvec,
+                                                    ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateVGL(const ParticleSet& P,
+                                                     int iat,
+                                                     ValueVector& pvec,
+                                                     GradVector& dpvec,
+                                                     ValueVector& d2pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
+}
+
+
+template<>
+void FreeOrbitalT<float>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
+}
+
+template<>
+void FreeOrbitalT<double>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
+}
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]     = std::complex<float>(coskr, sinkr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec)
+{
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]     = std::complex<double>(coskr, sinkr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat)
+{}
+
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  float phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
+        }
+      }
+    }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  double phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
+        }
+      }
+    }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
+}
+
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  std::complex<float> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<float>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+    }
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat)
+{
+  RealType sinkr, coskr;
+  std::complex<double> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<double>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+    }
+  }
+}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat,
+                                           GGGMatrix& d3phi_mat)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSet& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat,
+                                               GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
+        }
+      }
+    }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat,
+                                                GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
+        }
+      }
+    }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSet& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat,
+                                                             GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
+    }
+  }
+}
+
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSet& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat,
+                                                              GGGMatrix& d3phi_mat)
+{
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
+        }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
+    }
+  }
+}
+
+// generic implementation
+
+template<class T>
+FreeOrbitalT<T>::~FreeOrbitalT()
+{}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           ValueMatrix& d2phi)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    ValueVector d2p(d2phi[i], this->OrbitalSetSize);
+    evaluateVGL(P, iat, p, dp, d2p);
+  }
+}
+
+//Explicit template specialization
+template<>
+FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<float>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<double>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<float>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+template<>
+FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<double>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
+{
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+}
+
+
+template<class T>
+void FreeOrbitalT<T>::report(const std::string& pad) const
+{
+  app_log() << pad << "FreeOrbital report" << std::endl;
+  for (int ik = 0; ik < kvecs.size(); ik++)
+  {
+    app_log() << pad << ik << " " << kvecs[ik] << std::endl;
+  }
+  app_log() << pad << "end FreeOrbital report" << std::endl;
+  app_log().flush();
+}
+
+template class FreeOrbitalT<float>;
+template class FreeOrbitalT<double>;
+template class FreeOrbitalT<std::complex<float>>;
+template class FreeOrbitalT<std::complex<double>>;
+
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
new file mode 100644
index 0000000000..d2f2f450b8
--- /dev/null
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
@@ -0,0 +1,88 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_FREE_ORBITALT_H
+#define QMCPLUSPLUS_FREE_ORBITALT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class FreeOrbitalT : public SPOSetT<T>
+{
+public:
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueType   = typename SPOSetT<T>::ValueType;
+
+  FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart);
+  ~FreeOrbitalT();
+
+  inline std::string getClassName() const final { return "FreeOrbital"; }
+
+  // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
+  //  i \in [first, last)
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            ValueMatrix& d2phi) final;
+
+  // plug r_i into all orbitals
+  void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final;
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) final;
+
+  // hessian matrix is needed by backflow
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat) final;
+
+  // derivative of hessian is needed to optimize backflow
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat,
+                            GGGMatrix& d3phi_mat) override;
+
+  void report(const std::string& pad) const override;
+  // ---- begin required overrides
+  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<FreeOrbitalT<T>>(*this); }
+  void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); }
+  // required overrides end ----
+private:
+  const std::vector<PosType> kvecs; // kvecs vectors
+  const int mink;                   // minimum k index
+  const int maxk;                   // maximum number of kvecs vectors
+  std::vector<RealType> k2neg;      // minus kvecs^2
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
new file mode 100644
index 0000000000..0e1638f765
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
@@ -0,0 +1,209 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SHOSetBuilderT.h"
+#include "QMCWaveFunctions/SPOSetInputInfo.h"
+#include "OhmmsData/AttributeSet.h"
+#include "Utilities/IteratorUtility.h"
+#include "Utilities/string_utils.h"
+
+
+namespace qmcplusplus
+{
+template<class T>
+SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSet& P, Communicate* comm) : SPOSetBuilderT<T>("SHO", comm), Ps(P)
+{
+  this->ClassName = "SHOSetBuilderT";
+  this->legacy    = false;
+  app_log() << "Constructing SHOSetBuilderT" << std::endl;
+  reset();
+}
+
+template<class T>
+SHOSetBuilderT<T>::~SHOSetBuilderT() = default;
+
+template<class T>
+void SHOSetBuilderT<T>::reset()
+{
+  nstates = 0;
+  mass    = -1.0;
+  energy  = -1.0;
+  length  = -1.0;
+  center  = 0.0;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+{
+  APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not use legacy interface");
+
+  app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
+
+  SPOSetInputInfo input(cur);
+
+  return createSPOSet(cur, input);
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
+{
+  app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
+  reset();
+
+  // read parameters
+  std::string spo_name = "sho";
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_name, "name");
+  attrib.add(spo_name, "id");
+  attrib.add(mass, "mass");
+  attrib.add(energy, "energy");
+  attrib.add(energy, "frequency");
+  attrib.add(length, "length");
+  attrib.add(center, "center");
+  attrib.add(nstates, "size");
+  attrib.put(cur);
+
+  if (energy < 0.0)
+    energy = 1.0;
+  if (mass < 0.0 && length < 0.0)
+    length = 1.0;
+  if (mass < 0.0)
+    mass = 1.0 / (energy * length * length);
+  else if (length < 0.0)
+    length = 1.0 / std::sqrt(mass * energy);
+
+  // initialize states and/or adjust basis
+  int smax = -1;
+  if (input.has_index_info)
+    smax = std::max(smax, input.max_index());
+  if (input.has_energy_info)
+  {
+    smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
+  }
+  if (smax < 0)
+    APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
+  update_basis_states(smax);
+
+  // create sho state request
+  indices_t& indices = input.get_indices(this->states);
+  std::vector<SHOState*> sho_states;
+  for (int i = 0; i < indices.size(); ++i)
+    sho_states.push_back(basis_states[indices[i]]);
+
+  // make the sposet
+  auto sho = std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
+
+  sho->report("  ");
+  return sho;
+}
+
+template<class T>
+void SHOSetBuilderT<T>::update_basis_states(int smax)
+{
+  int states_required = smax - basis_states.size() + 1;
+  if (states_required > 0)
+  {
+    RealType N = smax + 1;
+    if (QMCTraits::DIM == 1)
+      nmax = smax;
+    else if (QMCTraits::DIM == 2)
+      nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
+    else if (QMCTraits::DIM == 3)
+    {
+      RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
+      nmax       = std::ceil(f / 3. + 1. / f - 2.);
+    }
+    else
+      APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other than 1, 2, or 3 are not supported");
+    int ndim                     = nmax + 1;
+    ind_dims[QMCTraits::DIM - 1] = 1;
+    for (int d = QMCTraits::DIM - 2; d > -1; --d)
+      ind_dims[d] = ind_dims[d + 1] * ndim;
+    int s    = 0;
+    int ntot = pow(ndim, QMCTraits::DIM);
+    TinyVector<int, QMCTraits::DIM> qnumber;
+    for (int m = 0; m < ntot; ++m)
+    {
+      int n    = 0; // principal quantum number
+      int nrem = m;
+      for (int d = 0; d < QMCTraits::DIM; ++d)
+      {
+        int i = nrem / ind_dims[d];
+        nrem -= i * ind_dims[d];
+        qnumber[d] = i;
+        n += i;
+      }
+      if (n <= nmax)
+      {
+        SHOState* st;
+        if (s < basis_states.size())
+          st = basis_states[s];
+        else
+        {
+          st = new SHOState();
+          basis_states.add(st);
+        }
+        RealType e = energy * (n + .5 * QMCTraits::DIM);
+        st->set(qnumber, e);
+        s++;
+      }
+    }
+    basis_states.energy_sort(1e-6, true);
+  }
+
+  // reset energy scale even if no states need to be added
+  for (int i = 0; i < basis_states.size(); ++i)
+  {
+    SHOState& state                                = *basis_states[i];
+    const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
+    int n                                          = 0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      n += qnumber[d];
+    state.energy = energy * (n + .5 * QMCTraits::DIM);
+  }
+
+  //somewhat redundant, but necessary
+  this->clear_states(0);
+  this->states[0]->finish(basis_states.states);
+
+  if (basis_states.size() <= smax)
+    APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough states");
+}
+
+template<class T>
+void SHOSetBuilderT<T>::report(const std::string& pad)
+{
+  app_log() << pad << "SHOSetBuilderT report" << std::endl;
+  app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
+  app_log() << pad << "  mass      = " << mass << std::endl;
+  app_log() << pad << "  frequency = " << energy << std::endl;
+  app_log() << pad << "  energy    = " << energy << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nstates   = " << nstates << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
+  app_log() << pad << "  # basis states = " << basis_states.size() << std::endl;
+  app_log() << pad << "  basis_states" << std::endl;
+  for (int s = 0; s < basis_states.size(); ++s)
+    basis_states[s]->report(pad + "  " + int2string(s) + " ");
+  app_log() << pad << "end SHOSetBuilderT report" << std::endl;
+  app_log().flush();
+}
+
+template class SHOSetBuilderT<double>;
+template class SHOSetBuilderT<float>;
+template class SHOSetBuilderT<std::complex<double>>;
+template class SHOSetBuilderT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
new file mode 100644
index 0000000000..7b3e9430d8
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
@@ -0,0 +1,63 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
+#define QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
+
+#include "QMCWaveFunctions/HarmonicOscillator/SHOSetT.h"
+#include "QMCWaveFunctions/SPOSetBuilderT.h"
+#include "QMCWaveFunctions/SPOSetInfo.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class SHOSetBuilderT : public SPOSetBuilderT<T>
+{
+public:
+  using RealType  = typename SPOSetT<T>::RealType;
+  using PosType   = typename SPOSetT<T>::PosType;
+  using indices_t = typename SPOSetBuilderT<T>::indices_t;
+
+  ParticleSet& Ps;
+
+  RealType length;
+  RealType mass;
+  RealType energy;
+  PosType center;
+
+  int nstates;
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> ind_dims;
+
+  SPOSetInfoSimple<SHOState> basis_states;
+
+  //construction/destruction
+  SHOSetBuilderT(ParticleSet& P, Communicate* comm);
+
+  ~SHOSetBuilderT() override;
+
+  //reset parameters
+  void reset();
+
+  //SPOSetBuilder interface
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
+
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
+
+  //local functions
+  void update_basis_states(int smax);
+  void report(const std::string& pad = "");
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
new file mode 100644
index 0000000000..76a606151d
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
@@ -0,0 +1,577 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SHOSetT.h"
+#include "Utilities/string_utils.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states)
+    : SPOSetT<T>(my_name), length(l), center(c)
+{
+  state_info.resize(sho_states.size());
+  for (int s = 0; s < sho_states.size(); ++s)
+    state_info[s] = *sho_states[s];
+  initialize();
+}
+
+template <typename T>
+void SHOSetT<T>::initialize()
+{
+  using std::sqrt;
+
+  this->OrbitalSetSize = state_info.size();
+
+  qn_max = -1;
+  for (int s = 0; s < state_info.size(); ++s)
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
+  qn_max += 1;
+
+  nmax = -1;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+    nmax = std::max(nmax, qn_max[d]);
+
+  prefactors.resize(nmax);
+  hermite.resize(QMCTraits::DIM, nmax);
+  bvalues.resize(QMCTraits::DIM, nmax);
+
+  if (nmax > 0)
+  {
+    prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+    for (int n = 1; n < nmax; ++n)
+      prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
+  }
+}
+
+template <typename T>
+SHOSetT<T>::~SHOSetT() = default;
+
+template <typename T>
+std::unique_ptr<SPOSetT<T>> SHOSetT<T>::makeClone() const { return std::make_unique<SHOSetT<T>>(*this); }
+
+template <typename T>
+void SHOSetT<T>::report(const std::string& pad) const
+{
+  app_log() << pad << "SHOSet report" << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  qn_max    = " << qn_max << std::endl;
+  app_log() << pad << "  # states  = " << state_info.size() << std::endl;
+  app_log() << pad << "  states" << std::endl;
+  for (int s = 0; s < state_info.size(); ++s)
+    state_info[s].sho_report(pad + "    " + int2string(s) + " ");
+  app_log() << pad << "end SHOSet report" << std::endl;
+  app_log().flush();
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  evaluate_v(r, p);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  GradVector dp(&dpsi[0], this->size());
+  ValueVector d2p(&d2psi[0], this->size());
+  evaluate_vgl(r, p, dp, d2p);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector p(logdet[i], this->size());
+    GradVector dp(dlogdet[i], this->size());
+    ValueVector d2p(d2logdet[i], this->size());
+    evaluate_vgl(P.R[iat], p, dp, d2p);
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
+{
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
+  evaluate_d1(x, psi, dpsi);
+  evaluate_d2(x, psi, d2psi);
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_hermite(const PosType& xpos)
+{
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    int nh = qn_max[d];
+    if (nh > 0)
+    {
+      RealType x    = xpos[d];
+      hermite(d, 0) = 1.0;
+      RealType Hnm2 = 0.0;
+      RealType Hnm1 = 1.0;
+      for (int n = 1; n < nh; ++n)
+      {
+        RealType Hn   = 2 * (x * Hnm1 - (n - 1) * Hnm2);
+        hermite(d, n) = Hn;
+        Hnm2          = Hnm1;
+        Hnm1          = Hn;
+      }
+    }
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
+{
+  using std::exp;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x = xpos[d];
+    RealType g = exp(-.5 * x * x);
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = prefactors[n] * g * hermite(d, n);
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    RealType phi          = 1.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      phi *= bvalues(d, state.quantum_number[d]);
+    psi[s] = phi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
+{
+  RealType ol = 1.0 / length;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x    = xpos[d];
+    RealType Hnm1 = 0.0;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      RealType Hn   = hermite(d, n);
+      bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
+      Hnm1          = Hn;
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    TinyVector<T, QMCTraits::DIM> dphi;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      dphi[d] = bvalues(d, state.quantum_number[d]);
+    dphi *= psi[s];
+    dpsi[s] = dphi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
+{
+  RealType ol2 = 1.0 / (length * length);
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = xpos[d];
+    RealType x2 = x * x;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
+    }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    T d2phi       = 0.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      d2phi += bvalues(d, state.quantum_number[d]);
+    d2phi *= psi[s];
+    d2psi[s] = d2phi;
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  using std::exp;
+  using std::sqrt;
+
+  evaluate_vgl(r, psi, dpsi, d2psi);
+
+  const int N = 6;
+  RealType H[N], dH[N], d2H[N], pre[N];
+  RealType p[N], dp[N], d2p[N];
+
+  pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+  for (int n = 1; n < N; ++n)
+    pre[n] = pre[n - 1] / sqrt(2. * n);
+
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = (r[d] - center[d]) / length;
+    RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x;
+    H[0]       = 1;
+    dH[0]      = 0;
+    d2H[0]     = 0;
+    H[1]       = 2 * x;
+    dH[1]      = 2;
+    d2H[1]     = 0;
+    H[2]       = 4 * x2 - 2;
+    dH[2]      = 8 * x;
+    d2H[2]     = 8;
+    H[3]       = 8 * x3 - 12 * x;
+    dH[3]      = 24 * x2 - 12;
+    d2H[3]     = 48 * x;
+    H[4]       = 16 * x4 - 48 * x2 + 12;
+    dH[4]      = 64 * x3 - 96 * x;
+    d2H[4]     = 192 * x2 - 96;
+    H[5]       = 32 * x5 - 160 * x3 + 120 * x;
+    dH[5]      = 160 * x4 - 480 * x2 + 120;
+    d2H[5]     = 640 * x3 - 960 * x;
+    RealType g = exp(-x2 / 2);
+    for (int n = 0; n < N; ++n)
+    {
+      p[n]   = pre[n] * g * H[n];
+      dp[n]  = pre[n] * g * (-x * H[n] + dH[n]);
+      d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
+    }
+    app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
+    app_log() << "  hermite check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << H[n] << std::endl;
+      app_log() << "    " << n << " " << hermite(d, n) << std::endl;
+    }
+    app_log() << "  phi d0 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << p[n] << std::endl;
+      app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d1 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d2 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
+    }
+  }
+}
+
+template <typename T>
+void SHOSetT<T>::test_derivatives()
+{
+  int n       = 3;
+  PosType c   = 5.123;
+  PosType L   = 1.0;
+  PosType drg = L / n;
+  PosType dr  = L / 1000;
+  int nphi    = state_info.size();
+
+  PosType o2dr, odr2;
+
+  ValueVector vpsi, vpsitmp;
+  GradVector vdpsi, vdpsin;
+  ValueVector vd2psi, vd2psin;
+
+
+  vpsi.resize(nphi);
+  vdpsi.resize(nphi);
+  vd2psi.resize(nphi);
+
+  vpsitmp.resize(nphi);
+  vdpsin.resize(nphi);
+  vd2psin.resize(nphi);
+
+
+  ValueVector psi(&vpsi[0], this->size());
+  GradVector dpsi(&vdpsi[0], this->size());
+  ValueVector d2psi(&vd2psi[0], this->size());
+
+  ValueVector psitmp(&vpsitmp[0], this->size());
+  GradVector dpsin(&vdpsin[0], this->size());
+  ValueVector d2psin(&vd2psin[0], this->size());
+
+
+  app_log() << " loading dr" << std::endl;
+
+  RealType odr2sum = 0.0;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType odr = 1.0 / dr[d];
+    o2dr[d]      = .5 * odr;
+    odr2[d]      = odr * odr;
+    odr2sum += odr2[d];
+  }
+
+  app_log() << "SHOSet::test_derivatives" << std::endl;
+
+  const SimulationCell simulation_cell;
+  ParticleSet Ps(simulation_cell);
+
+  int p = 0;
+  PosType r, rtmp;
+  for (int i = 0; i < n; ++i)
+  {
+    r[0] = c[0] + i * drg[0];
+    for (int j = 0; j < n; ++j)
+    {
+      r[1] = c[1] + j * drg[1];
+      for (int k = 0; k < n; ++k)
+      {
+        r[2] = c[2] + k * drg[2];
+
+        evaluate_vgl(r, psi, dpsi, d2psi);
+
+        for (int m = 0; m < nphi; ++m)
+          d2psin[m] = -2 * odr2sum * psi[m];
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+        {
+          rtmp = r;
+          rtmp[d] += dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi = psitmp[m];
+            dpsin[m][d]   = phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+          rtmp = r;
+          rtmp[d] -= dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi = psitmp[m];
+            dpsin[m][d] -= phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+        }
+
+        RealType dphi_diff  = 0.0;
+        RealType d2phi_diff = 0.0;
+        for (int m = 0; m < nphi; ++m)
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            dphi_diff = std::max<RealType>(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d]));
+        for (int m = 0; m < nphi; ++m)
+          d2phi_diff = std::max<RealType>(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
+        app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff << std::endl;
+        app_log() << "    derivatives" << std::endl;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsi[m][d]) << " ";
+          app_log() << std::endl;
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsin[m][d]) << " ";
+          app_log() << std::endl;
+        }
+        app_log() << "    laplacians" << std::endl;
+        PosType x = r / length;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
+          app_log() << "    " << qn << real(d2psin[m] / psi[m]) << std::endl;
+        }
+        p++;
+      }
+    }
+  }
+
+  app_log() << "end SHOSet::test_derivatives" << std::endl;
+}
+
+template <typename T>
+void SHOSetT<T>::test_overlap()
+{
+  app_log() << "SHOSet::test_overlap" << std::endl;
+
+
+  //linear
+  int d = 0;
+
+  app_log() << "  length = " << length << std::endl;
+  app_log() << "  prefactors" << std::endl;
+  for (int n = 0; n < qn_max[d]; ++n)
+    app_log() << "    " << n << " " << prefactors[n] << std::endl;
+
+  app_log() << "  1d overlap" << std::endl;
+
+  ValueVector vpsi;
+  vpsi.resize(this->size());
+  ValueVector psi(&vpsi[0], this->size());
+
+  double xmax = 4.0;
+  double dx   = .1;
+  double dr   = length * dx;
+
+  int nphi = qn_max[d];
+  Array<double, 2> omat;
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+
+  PosType xp = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+  {
+    xp[d] = x;
+    evaluate_hermite(xp);
+    evaluate_d0(xp, psi);
+
+    for (int i = 0; i < nphi; ++i)
+      for (int j = 0; j < nphi; ++j)
+        omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
+  }
+
+  for (int i = 0; i < nphi; ++i)
+  {
+    app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
+
+
+  //volumetric
+  app_log() << "  3d overlap" << std::endl;
+  double dV = dr * dr * dr;
+  nphi      = this->size();
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+    for (double y = -xmax; y < xmax; y += dx)
+      for (double z = -xmax; z < xmax; z += dx)
+      {
+        xp[0] = x;
+        xp[1] = y;
+        xp[2] = z;
+        evaluate_hermite(xp);
+        evaluate_d0(xp, psi);
+
+        for (int i = 0; i < nphi; ++i)
+          for (int j = 0; j < nphi; ++j)
+            omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
+      }
+  for (int i = 0; i < nphi; ++i)
+  {
+    app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
+
+
+  app_log() << "end SHOSet::test_overlap" << std::endl;
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+{
+  not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& grad_grad_logdet)
+{
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  ValueMatrix& logdet,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& grad_grad_logdet,
+                                  GGGMatrix& grad_grad_grad_logdet)
+{
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                int first,
+                                int last,
+                                const ParticleSet& source,
+                                int iat_src,
+                                GradMatrix& gradphi)
+{
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
+}
+
+template <typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                int first,
+                                int last,
+                                const ParticleSet& source,
+                                int iat_src,
+                                GradMatrix& grad_phi,
+                                HessMatrix& grad_grad_phi,
+                                GradMatrix& grad_lapl_phi)
+{
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
+}
+
+// Class concrete types from ValueType
+template class SHOSetT<double>;
+template class SHOSetT<float>;
+template class SHOSetT<std::complex<double>>;
+template class SHOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
new file mode 100644
index 0000000000..6ef256df92
--- /dev/null
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
@@ -0,0 +1,158 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SHOSETT_H
+#define QMCPLUSPLUS_SHOSETT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/SPOInfo.h"
+
+namespace qmcplusplus
+{
+struct SHOState : public SPOInfo
+{
+  TinyVector<int, QMCTraits::DIM> quantum_number;
+
+  SHOState()
+  {
+    quantum_number = -1;
+    energy         = 0.0;
+  }
+
+  ~SHOState() override {}
+
+  inline void set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
+  {
+    quantum_number = qn;
+    energy         = e;
+  }
+
+  inline void sho_report(const std::string& pad = "") const
+  {
+    app_log() << pad << "qn=" << quantum_number << "  e=" << energy << std::endl;
+  }
+};
+
+template<typename T>
+class SHOSetT : public SPOSetT<T>
+{
+public:
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using value_type  = typename ValueMatrix::value_type;
+  using grad_type   = typename GradMatrix::value_type;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = TinyVector<RealType, QMCTraits::DIM>;
+  using HessType    = typename OrbitalSetTraits<T>::HessType;
+  using HessMatrix  = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGType     = TinyVector<HessType, OHMMS_DIM>;
+  using GGGVector   = Vector<GGGType>;
+  using GGGMatrix   = Matrix<GGGType>;
+
+  RealType length;
+  PosType center;
+
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> qn_max;
+  std::vector<SHOState> state_info;
+  std::vector<RealType> prefactors;
+  Array<RealType, 2> hermite;
+  Array<RealType, 2> bvalues;
+  Array<RealType, 2> d0_values;
+  Array<RealType, 2> d1_values;
+  Array<RealType, 2> d2_values;
+
+  //construction/destruction
+  SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states);
+
+  ~SHOSetT() override;
+
+  std::string getClassName() const override { return "SHOSet"; }
+
+  void initialize();
+
+  //SPOSet interface methods
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+
+  //local functions
+  void evaluate_v(PosType r, ValueVector& psi);
+  void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+  void evaluate_hermite(const PosType& xpos);
+  void evaluate_d0(const PosType& xpos, ValueVector& psi);
+  void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
+  void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
+  void report(const std::string& pad = "") const override;
+  void test_derivatives();
+  void test_overlap();
+  void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  //empty methods
+  /// number of orbitals is determined only by initial request
+  inline void setOrbitalSetSize(int norbs) override {}
+
+  ///unimplemented functions call this to abort
+  inline void not_implemented(const std::string& method)
+  {
+    APP_ABORT("SHOSet::" + method + " has not been implemented.");
+  }
+
+
+  //methods to be implemented in the future (possibly)
+  void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override;
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet) override;
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet,
+                            GGGMatrix& dddlogdet) override;
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override;
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& dphi,
+                          HessMatrix& ddphi,
+                          GradMatrix& dlapl_phi) override;
+};
+
+} // namespace qmcplusplus
+
+
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
index d4f3208b61..3d1854cea4 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h
@@ -15,7 +15,7 @@
 #define QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H
 
 #include "LCAOrbitalSet.h"
-#include "LCAOrbitalSetWithCorrection.h"
+#include "SoaCuspCorrection.h"
 #include "CuspCorrection.h"
 
 class Communicate;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
new file mode 100644
index 0000000000..dba20478b7
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
@@ -0,0 +1,966 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "LCAOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+#include "CPU/BLAS.hpp"
+#include <ResourceCollection.h>
+
+namespace qmcplusplus
+{
+
+template<class T>
+struct LCAOrbitalSetT<T>::LCAOMultiWalkerMem : public Resource
+{
+  LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {}
+  LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {}
+
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<LCAOMultiWalkerMem>(*this); }
+
+  OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
+  OffloadMWVGLArray basis_mw;  // [5][NW][NumAO]
+  OffloadMWVArray phi_v;       // [NW][NumMO]
+  OffloadMWVArray basis_v_mw;  // [NW][NumMO]
+};
+
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs)
+    : SPOSetT<T>(my_name),
+      BasisSetSize(bs ? bs->getBasisSetSize() : 0),
+      Identity(true),
+      basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
+      mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
+{
+  if (!bs)
+    throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its  basis set!");
+  myBasisSet = std::move(bs);
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  this->OrbitalSetSize = BasisSetSize;
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in)
+    : SPOSetT<T>(in),
+      myBasisSet(in.myBasisSet->makeClone()),
+      C(in.C),
+      BasisSetSize(in.BasisSetSize),
+      C_copy(in.C_copy),
+      Identity(in.Identity),
+      basis_timer_(in.basis_timer_),
+      mo_timer_(in.mo_timer_)
+{
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  if (!in.Identity)
+  {
+    Tempv.resize(this->OrbitalSetSize);
+    Temphv.resize(this->OrbitalSetSize);
+    Tempghv.resize(this->OrbitalSetSize);
+  }
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{
+  if (C)
+    throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot reset existing MO coefficients");
+
+  Identity       = false;
+  this->OrbitalSetSize = norbs;
+  C              = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
+  Tempv.resize(this->OrbitalSetSize);
+  Temphv.resize(this->OrbitalSetSize);
+  Tempghv.resize(this->OrbitalSetSize);
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::checkObject() const
+{
+  if (Identity)
+  {
+    if (this->OrbitalSetSize != BasisSetSize)
+      throw std::runtime_error(
+          "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!");
+    if (C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should be nullptr if Identity = true!");
+  }
+  else
+  {
+    if (!C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should not be nullptr if Identity = false!");
+    if (this->OrbitalSetSize != C->rows())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C rows doesn't match OrbitalSetSize.");
+    if (BasisSetSize != C->cols())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C columns doesn't match BasisSetSize.");
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
+{
+  auto resource_index = collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader          = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  collection.takebackResource(spo_leader.mw_mem_handle_);
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetT<T>::makeClone() const { return std::make_unique<LCAOrbitalSetT<T>>(*this); }
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  if (Identity)
+  { //PAY ATTENTION TO COMPLEX
+    myBasisSet->evaluateV(P, iat, psi.data());
+  }
+  else
+  {
+    Vector<T> vTemp(Temp.data(0), BasisSetSize);
+    this->myBasisSet->evaluateV(P, iat, vTemp.data());
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    MatrixOperators::product(C_partial_view, vTemp, psi);
+  }
+}
+
+/** Find a better place for other user classes, Matrix should be padded as well */
+template<typename T, unsigned D>
+static void Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B, VectorSoaContainer<T, D>& C)
+{
+  constexpr char transa = 't';
+  constexpr char transb = 'n';
+  constexpr T zone(1);
+  constexpr T zero(0);
+  BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(),
+             C.capacity());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             ValueVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2psi.data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             HessVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                               int i,
+                                               ValueMatrix& psi,
+                                               GradMatrix& dpsi,
+                                               HessMatrix& d2psi,
+                                               GGGMatrix& dghpsi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+
+    dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx
+    dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy
+    dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz
+    dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
+    dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy
+    dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz
+    dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
+    dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
+    dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz
+
+    dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
+    dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
+    dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
+    dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy
+    dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz
+    dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
+    dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz
+    dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz
+
+    dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz
+    dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
+    dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
+    dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
+    dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
+    dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
+    dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
+    dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
+    dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                               ValueVector& psi,
+                                               GradVector& dpsi,
+                                               HessVector& d2psi,
+                                               GGGVector& dghpsi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+
+    dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx
+    dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy
+    dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz
+    dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
+    dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy
+    dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz
+    dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
+    dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
+    dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz
+
+    dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
+    dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
+    dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
+    dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy
+    dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz
+    dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
+    dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy
+    dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz
+
+    dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz
+    dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
+    dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
+    dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
+    dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
+    dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
+    dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
+    dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
+    dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const
+{
+  const size_t output_size     = dpsi.size();
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[j][0] = -gx[j];
+    dpsi[j][1] = -gy[j];
+    dpsi[j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  //TAKE CARE OF IDENTITY
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->evaluateVGL(P, iat, Temp);
+  }
+
+  if (Identity)
+    evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    {
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+      Product_ABt(Temp, C_partial_view, Tempv);
+    }
+    evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                   const RefVectorWithLeader<ParticleSet>& P_list,
+                                   int iat,
+                                   const RefVector<ValueVector>& psi_v_list,
+                                   const RefVector<GradVector>& dpsi_v_list,
+                                   const RefVector<ValueVector>& d2psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_vgl_v  = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
+
+  phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+
+  const size_t nw = phi_vgl_v.size(1);
+
+  //TODO: make this cleaner?
+  for (int iw = 0; iw < nw; iw++)
+  {
+    const size_t output_size = psi_v_list[iw].get().size();
+    std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
+    std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
+    // grads are [dim, walker, orb] in phi_vgl_v
+    //           [walker][orb, dim] in dpsi_v_list
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                           int iat,
+                                           OffloadMWVGLArray& phi_vgl_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& basis_mw   = spo_leader.mw_mem_handle_.getResource().basis_mw;
+  basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
+
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
+  }
+
+  if (Identity)
+  {
+    // output_size can be smaller than BasisSetSize
+    const size_t output_size = phi_vgl_v.size(2);
+    const size_t nw          = phi_vgl_v.size(1);
+
+    for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iw = 0; iw < nw; iw++)
+        std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_vgl_v.size(2);
+    assert(requested_orb_size <= this->OrbitalSetSize);
+    {
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+      // TODO: make class for general blas interface in Platforms
+      // have instance of that class as member of LCAOrbitalSetT, call gemm through that
+      BLAS::gemm('T', 'N',
+                 requested_orb_size,        // MOs
+                 spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
+                 BasisSetSize,              // AOs
+                 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
+                 requested_orb_size);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                     int iat,
+                                     const RefVector<ValueVector>& psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_v      = spo_leader.mw_mem_handle_.getResource().phi_v;
+  phi_v.resize(spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
+
+  const size_t output_size = phi_v.size(1);
+  const size_t nw          = phi_v.size(0);
+
+  for (int iw = 0; iw < nw; iw++)
+    std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                             int iat,
+                                             OffloadMWVArray& phi_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  const size_t nw  = spo_list.size();
+  auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
+  basis_v_mw.resize(nw, BasisSetSize);
+
+  myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
+
+  if (Identity)
+  {
+    std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_v.size(1);
+    assert(requested_orb_size <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+    BLAS::gemm('T', 'N',
+               requested_orb_size, // MOs
+               spo_list.size(),    // walkers
+               BasisSetSize,       // AOs
+               1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(),
+               requested_orb_size);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                         const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                         const RefVector<ValueVector>& psi_list,
+                                         const std::vector<const T*>& invRow_ptr_list,
+                                         std::vector<std::vector<T>>& ratios_list) const
+{
+  const size_t nw = spo_list.size();
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
+    {
+      spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
+      ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size());
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                      ValueVector& psi,
+                                      const ValueVector& psiinv,
+                                      std::vector<T>& ratios)
+{
+  Vector<T> vTemp(Temp.data(0), BasisSetSize);
+  Vector<T> invTemp(Temp.data(1), BasisSetSize);
+
+  {
+    ScopedTimer local(mo_timer_);
+    // when only a subset of orbitals is used, extract limited rows of C.
+    Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
+    MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
+  }
+
+  for (size_t j = 0; j < VP.getTotalNum(); j++)
+  {
+    {
+      ScopedTimer local(basis_timer_);
+      myBasisSet->evaluateV(VP, j, vTemp.data());
+    }
+    ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                   const RefVectorWithLeader<ParticleSet>& P_list,
+                                                   int iat,
+                                                   const std::vector<const T*>& invRow_ptr_list,
+                                                   OffloadMWVGLArray& phi_vgl_v,
+                                                   std::vector<T>& ratios,
+                                                   std::vector<GradType>& grads) const
+{
+  assert(this == &spo_list.getLeader());
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+  // Device data of phi_vgl_v must be up-to-date upon return
+  phi_vgl_v.updateTo();
+
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
+    GradType dphi;
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
+    grads[iw] = dphi;
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
+{
+  //TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGH(P, iat, Temph);
+  if (Identity)
+    evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Temph, C_partial_view, Temphv);
+    evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  HessVector& dhpsi,
+                                  GGGVector& dghpsi)
+{
+  // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n");
+
+  //TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+  if (Identity)
+    evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Tempgh, C_partial_view, Tempghv);
+    evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
+  }
+}
+
+/* implement using gemm algorithm */
+template<class T>
+inline void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                             int i,
+                                             ValueMatrix& logdet,
+                                             GradMatrix& dlogdet,
+                                             ValueMatrix& d2logdet) const
+{
+  const size_t output_size = logdet.cols();
+  std::copy_n(temp.data(0), output_size, logdet[i]);
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dlogdet[i][j][0] = gx[j];
+    dlogdet[i][j][1] = gy[j];
+    dlogdet[i][j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2logdet[i]);
+}
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                             int i,
+                                             ValueMatrix& psi,
+                                             GradMatrix& dpsi,
+                                             HessMatrix& d2psi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const
+{
+  const size_t output_size     = dpsi.cols();
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                                      int i,
+                                                      GradMatrix& dpsi,
+                                                      HessMatrix& dgpsi,
+                                                      GradMatrix& dlpsi) const
+{
+  const size_t output_size         = dpsi.cols();
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
+    // for an atomic center, the ion gradient is the negative of the elecron gradient.
+    // Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+
+    dgpsi[i][j](0, 0) = -hxx[j];
+    dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
+    dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
+    dgpsi[i][j](1, 1)                     = -hyy[j];
+    dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
+    dgpsi[i][j](2, 2)                     = -hzz[j];
+
+    //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector.
+    dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
+    dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
+    dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      Product_ABt(Temp, C_partial_view, Tempv);
+      evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         HessMatrix& grad_grad_logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      Product_ABt(Temph, C_partial_view, Temphv);
+      evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         HessMatrix& grad_grad_logdet,
+                                         GGGMatrix& grad_grad_grad_logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+      evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
+      Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
+      evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& gradphi)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      evaluate_ionderiv_v_impl(Temp, i, gradphi);
+    }
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      Product_ABt(this->Temp, *C, this->Tempv);
+      evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& grad_phi,
+                                       HessMatrix& grad_grad_phi,
+                                       GradMatrix& grad_lapl_phi)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    }
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      Product_ABt(this->Tempgh, *C, this->Tempghv);
+      evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
+                                          int iel,
+                                          const ParticleSet& source,
+                                          int iat_src,
+                                          GradVector& gradphi)
+{
+  if (Identity)
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
+  }
+  else
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    Product_ABt(Temp, *C, this->Tempv);
+    evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  if (!use_stored_copy)
+    *C_copy = *C;
+  //gemm is out-of-place
+  BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize,
+             rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize);
+
+  /* debugging code
+  app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl;
+  for (int j = 0; j < OrbitalSetSize; j++)
+    for (int i = 0; i < BasisSetSize; i++)
+    {
+      app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific
+                << *(C->data() + j * BasisSetSize + i);
+
+      if ((j * BasisSetSize + i + 1) % 4 == 0)
+        app_log() << std::endl;
+    }
+  */
+}
+
+// Class concrete types from ValueType
+template class LCAOrbitalSetT<double>;
+template class LCAOrbitalSetT<float>;
+template class LCAOrbitalSetT<std::complex<double>>;
+template class LCAOrbitalSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
new file mode 100644
index 0000000000..6df0013bd5
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
@@ -0,0 +1,336 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H
+
+#include <memory>
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/BasisSetBase.h"
+
+#include "Numerics/MatrixOperators.h"
+#include "Numerics/DeterminantOperators.h"
+
+namespace qmcplusplus
+{
+/** class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants.
+   *
+   * SoA verson of LCOrtbitalSet
+   * Localized basis set is always real 
+   */
+template<class T>  
+class LCAOrbitalSetT : public SPOSetT<T>
+{
+public:
+  using basis_type = SoaBasisSetBase<T>;
+  using vgl_type   = typename basis_type::vgl_type;
+  using vgh_type   = typename basis_type::vgh_type;
+  using vghgh_type = typename basis_type::vghgh_type;
+
+  using IndexType   = typename SPOSetT<T>::IndexType;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = typename SPOSetT<T>::ComplexType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using GGGMatrix  = typename SPOSetT<T>::GGGMatrix;
+  using GGGVector  = typename SPOSetT<T>::GGGVector;
+  using GradType = typename SPOSetT<T>::GradType;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+
+  ///pointer to the basis set
+  std::unique_ptr<basis_type> myBasisSet;
+  /// pointer to matrix containing the coefficients
+  std::shared_ptr<ValueMatrix> C;
+
+  /** constructor
+     * @param bs pointer to the BasisSet
+     */
+  LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs);
+
+  LCAOrbitalSetT(const LCAOrbitalSetT& in);
+
+  std::string getClassName() const final { return "LCAOrbitalSetT"; }
+
+  bool isRotationSupported() const final { return true; }
+
+  bool hasIonDerivs() const final { return true; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  void storeParamsBeforeRotation() final { C_copy = std::make_shared<ValueMatrix>(*C); }
+
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
+
+  /** set the OrbitalSetSize and Identity=false and initialize internal storages
+    */
+  void setOrbitalSetSize(int norbs) final;
+
+  /** return the size of the basis set
+    */
+  int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); }
+
+  bool isIdentity() const { return Identity; };
+
+  /** check consistency between Identity and C
+    *
+    */
+  void checkObject() const final;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const final;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSet>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const final;
+
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const T*>& invRow_ptr_list,
+                            std::vector<std::vector<T>>& ratios_list) const final;
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) final;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<T>& ratios,
+                                      std::vector<GradType>& grads) const final;
+
+  void evaluateVGH(const ParticleSet& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) final;
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) final;
+
+  //NOTE:  The data types get complicated here, so here's an overview of the
+  //       data types associated with ionic derivatives, and how to get their data.
+  //
+  //NOTE:  These data structures hold the data for one particular ion, and so the ID is implicit.
+  //       It's up to the user to keep track of which ion these derivatives refer to.
+  //
+  // 1.) GradMatrix grad_phi:  Holds the ionic derivatives of each SPO for each electron.
+  //            Example:  grad_phi[iel][iorb][idim].  iel  -- electron index.
+  //                                                iorb -- orbital index.
+  //                                                idim  -- cartesian index of ionic derivative.
+  //                                                        X=0, Y=1, Z=2.
+  //
+  // 2.) HessMatrix grad_grad_phi:  Holds the ionic derivatives of the electron gradient components
+  //                                   for each SPO and each electron.
+  //            Example:  grad_grad_phi[iel][iorb](idim,edim)  iel  -- electron index.
+  //                                                           iorb -- orbital index.
+  //                                                           idim -- ionic derivative's cartesian index.
+  //                                                              X=0, Y=1, Z=2
+  //                                                           edim -- electron derivative's cartesian index.
+  //                                                              x=0, y=1, z=2.
+  //
+  // 3.) GradMatrix grad_lapl_phi:  Holds the ionic derivatives of the electron laplacian for each SPO and each electron.
+  //            Example:  grad_lapl_phi[iel][iorb][idim].  iel  -- electron index.
+  //                                                       iorb -- orbital index.
+  //                                                       idim -- cartesian index of ionic derivative.
+  //                                                           X=0, Y=1, Z=2.
+
+  /**
+ * \brief Calculate ion derivatives of SPO's.
+ *  
+ *  @param P Electron particle set.
+ *  @param first index of first electron 
+ *  @@param last index of last electron
+ *  @param source Ion particle set.
+ *  @param iat_src  Index of ion.
+ *  @param gradphi Container storing ion gradients for all particles and all orbitals.
+ */
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) final;
+
+  /**
+ * \brief Calculate ion derivatives of SPO's, their gradients, and their laplacians.
+ *  
+ *  @param P Electron particle set.
+ *  @param first index of first electron.
+ *  @@param last index of last electron
+ *  @param source Ion particle set.
+ *  @param iat_src  Index of ion.
+ *  @param grad_phi Container storing ion gradients for all particles and all orbitals.
+ *  @param grad_grad_phi Container storing ion gradients of electron gradients for all particles and all orbitals.
+ *  @param grad_lapl_phi Container storing ion gradients of SPO laplacians for all particles and all orbitals.
+ */
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) final;
+
+  void evaluateGradSourceRow(const ParticleSet& P,
+                             int iel,
+                             const ParticleSet& source,
+                             int iat_src,
+                             GradVector& grad_phi) final;
+
+  void createResource(ResourceCollection& collection) const final;
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+
+protected:
+  ///number of Single-particle orbitals
+  const IndexType BasisSetSize;
+  /// a copy of the original C before orbital rotation is applied;
+  std::shared_ptr<ValueMatrix> C_copy;
+
+  ///true if C is an identity matrix
+  bool Identity;
+  ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L
+  vgl_type Temp;
+  ///Tempv(OrbitalSetSize) Tempv=C*Temp
+  vgl_type Tempv;
+
+  ///These are temporary VectorSoAContainers to hold value, gradient, and hessian for
+  ///all basis or SPO functions evaluated at a given point.
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temph;
+  ///Norbitals x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temphv;
+
+  ///These are temporary VectorSoAContainers to hold value, gradient, hessian, and
+  /// gradient hessian for all basis or SPO functions evaluated at a given point.
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempgh;
+  ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempghv;
+
+private:
+  ///helper functions to handle Identity
+  void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const;
+
+  void evaluate_vgl_impl(const vgl_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         ValueMatrix& d2logdet) const;
+  ///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures.
+
+
+  ///This unpacks temp into vectors psi, dpsi, and d2psi.
+  void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const;
+
+  ///Unpacks temp into the ith row (or electron index) of logdet, dlogdet, dhlogdet.
+  void evaluate_vgh_impl(const vgh_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         HessMatrix& dhlogdet) const;
+  ///Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value, gradient, hessian
+  ///and gradient hessian.
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& d2psi,
+                           GGGVector& dghpsi) const;
+
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           int i,
+                           ValueMatrix& logdet,
+                           GradMatrix& dlogdet,
+                           HessMatrix& dhlogdet,
+                           GGGMatrix& dghlogdet) const;
+
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet.
+  void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const;
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient of value,
+  ///  electron gradient, and electron laplacian result into dlogdet, dglogdet, and dllogdet respectively.
+  void evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                  int i,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& dglogdet,
+                                  GradMatrix& dllogdet) const;
+
+  ///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet.
+  void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const;
+
+  void mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              OffloadMWVGLArray& phi_vgl_v) const;
+
+  /// packed walker GEMM implementation
+  void mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                OffloadMWVArray& phi_v) const;
+
+  struct LCAOMultiWalkerMem;
+  ResourceHandle<LCAOMultiWalkerMem> mw_mem_handle_;
+  /// timer for basis set
+  NewTimer& basis_timer_;
+  /// timer for MO
+  NewTimer& mo_timer_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
new file mode 100644
index 0000000000..f713646d82
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
@@ -0,0 +1,73 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "LCAOrbitalSetWithCorrectionT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                                                              ParticleSet& ions,
+                                                              ParticleSet& els,
+                                                              std::unique_ptr<basis_type>&& bs)
+    : SPOSetT<T>(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
+{}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
+{
+  assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
+  this->OrbitalSetSize = norbs;
+  cusp.setOrbitalSetSize(norbs);
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetWithCorrectionT<T>::makeClone() const
+{
+  return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  lcao.evaluateValue(P, iat, psi);
+  cusp.addV(P, iat, psi);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSet& P,
+                                                  int iat,
+                                                  ValueVector& psi,
+                                                  GradVector& dpsi,
+                                                  ValueVector& d2psi)
+{
+  lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
+  cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
+}
+
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSet& P,
+                                                           int first,
+                                                           int last,
+                                                           ValueMatrix& logdet,
+                                                           GradMatrix& dlogdet,
+                                                           ValueMatrix& d2logdet)
+{
+  lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
+}
+
+template class LCAOrbitalSetWithCorrectionT<double>;
+template class LCAOrbitalSetWithCorrectionT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
new file mode 100644
index 0000000000..b1fc69cf6e
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -0,0 +1,75 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
+#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "LCAOrbitalSetT.h"
+#include "SoaCuspCorrectionT.h"
+
+
+namespace qmcplusplus
+{
+/** class to add cusp correction to LCAOrbitalSet.
+   *
+   */
+
+template<typename T>
+class LCAOrbitalSetWithCorrectionT : public SPOSetT<T>
+{
+public:
+  using basis_type  = typename LCAOrbitalSetT<T>::basis_type;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  /** constructor
+     * @param ions
+     * @param els
+     * @param bs pointer to the BasisSet
+     * @param rl report level
+     */
+  LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                               ParticleSet& ions,
+                               ParticleSet& els,
+                               std::unique_ptr<basis_type>&& bs);
+
+  LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default;
+
+  std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  void setOrbitalSetSize(int norbs) final;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+  friend class LCAOrbitalBuilder;
+
+private:
+  LCAOrbitalSetT<T> lcao;
+
+  SoaCuspCorrectionT<T> cusp;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
new file mode 100644
index 0000000000..57a1312447
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
@@ -0,0 +1,171 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaCuspCorrectionT.cpp
+ */
+#include "SoaCuspCorrectionT.h"
+#include "SoaCuspCorrectionBasisSet.h"
+
+namespace qmcplusplus
+{
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions))
+{
+  NumCenters = ions.getTotalNum();
+  NumTargets = els.getTotalNum();
+  LOBasisSet.resize(NumCenters);
+}
+
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(const SoaCuspCorrectionT<T>& a) = default;
+
+template<class T>
+void SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
+{
+  MaxOrbSize = norbs;
+  myVGL.resize(5, MaxOrbSize);
+}
+
+template<class T>
+inline void SoaCuspCorrectionT<T>::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl)
+{
+  assert(MaxOrbSize >= vgl.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  {
+    const auto v_in  = myVGL[0];
+    const auto gx_in = myVGL[1];
+    const auto gy_in = myVGL[2];
+    const auto gz_in = myVGL[3];
+    const auto l_in  = myVGL[4];
+    auto v_out       = vgl.data(0);
+    auto gx_out      = vgl.data(1);
+    auto gy_out      = vgl.data(2);
+    auto gz_out      = vgl.data(3);
+    auto l_out       = vgl.data(4);
+    for (size_t i = 0; i < vgl.size(); ++i)
+    {
+      v_out[i] += v_in[i];
+      gx_out[i] += gx_in[i];
+      gy_out[i] += gy_in[i];
+      gz_out[i] += gz_in[i];
+      l_out[i] += l_in[i];
+    }
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
+                                         int iat,
+                                         ValueVector& psi,
+                                         GradVector& dpsi,
+                                         ValueVector& d2psi)
+{
+  assert(MaxOrbSize >= psi.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.size(); ++i)
+  {
+    psi[i] += v_in[i];
+    dpsi[i][0] += gx_in[i];
+    dpsi[i][1] += gy_in[i];
+    dpsi[i][2] += gz_in[i];
+    d2psi[i] += l_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSet& P,
+                                         int iat,
+                                         int idx,
+                                         ValueMatrix& psi,
+                                         GradMatrix& dpsi,
+                                         ValueMatrix& d2psi)
+{
+  assert(MaxOrbSize >= psi.cols());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.cols(); ++i)
+  {
+    psi[idx][i] += v_in[i];
+    dpsi[idx][i][0] += gx_in[i];
+    dpsi[idx][i][1] += gy_in[i];
+    dpsi[idx][i][2] += gz_in[i];
+    d2psi[idx][i] += l_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::evaluateV(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  assert(MaxOrbSize >= psi.size());
+  T* tmp_vals = myVGL[0];
+
+  std::fill_n(tmp_vals, myVGL.size(), 0.0);
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+
+  //THIS IS SERIAL, only way to avoid this is to use myVGL
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate(dist[c], tmp_vals);
+
+  { //collect
+    const auto v_in = myVGL[0];
+    for (size_t i = 0; i < psi.size(); ++i)
+      psi[i] += v_in[i];
+  }
+}
+
+template<class T>
+void SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
+{
+  assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!");
+  LOBasisSet[icenter].reset(aos.release());
+}
+
+template class SoaCuspCorrectionT<double>;
+template class SoaCuspCorrectionT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
new file mode 100644
index 0000000000..f20bfa5730
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
@@ -0,0 +1,117 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaCuspCorrectionT.h
+ */
+#ifndef QMCPLUSPLUS_SOA_CUSPCORRECTION_H
+#define QMCPLUSPLUS_SOA_CUSPCORRECTION_H
+
+#include "Configuration.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+class CuspCorrectionAtomicBasis;
+
+/** A localized basis set derived from BasisSetBase<typename COT::ValueType>
+ *
+ * This class performs the evaluation of the basis functions and their
+ * derivatives for each of the N-particles in a configuration.
+ * The template parameter COT denotes Centered-Orbital-Type which provides
+ * a set of localized orbitals associated with a center.
+ */
+template<class T>
+class SoaCuspCorrectionT
+{
+  using RealType    = typename SPOSetT<T>::RealType;
+  using VGLVector   = VectorSoaContainer<T, 5>;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using PosType     = typename SPOSetT<T>::PosType;
+
+  ///number of centers, e.g., ions
+  size_t NumCenters;
+  ///number of quantum particles
+  size_t NumTargets;
+  ///number of quantum particles
+  const int myTableIndex;
+  /** Maximal number of supported MOs
+   * this is not the AO basis because cusp correction is applied on the MO directly.
+   */
+  int MaxOrbSize = 0;
+
+  ///COMPLEX WON'T WORK
+  using COT = CuspCorrectionAtomicBasis<RealType>;
+
+  /** container of the unique pointers to the Atomic Orbitals
+   *
+   * size of LOBasisSet = number of centers (atoms)
+   * should use unique_ptr once COT is fixed for better performance
+   */
+  std::vector<std::shared_ptr<const COT>> LOBasisSet;
+
+  Matrix<RealType> myVGL;
+
+public:
+  /** constructor
+   * @param ions ionic system
+   * @param els electronic system
+   */
+  SoaCuspCorrectionT(ParticleSet& ions, ParticleSet& els);
+
+  /** copy constructor */
+  SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
+
+  /** set the number of orbitals this cusp correction may serve. call this before adding any correction centers.
+   */
+  void setOrbitalSetSize(int norbs);
+
+  /** compute VGL
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(5,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl);
+
+  void evaluate_vgl(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluate_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi);
+
+  /** compute values for the iat-paricle move
+   *
+   * Always uses getTempDists() and getTempDispls()
+   */
+  void evaluateV(const ParticleSet& P, int iat, ValueVector& psi);
+
+  /** add a new set of Centered Atomic Orbitals
+   * @param icenter the index of the center
+   * @param aos a set of Centered Atomic Orbitals
+   */
+  void add(int icenter, std::unique_ptr<COT> aos);
+
+  void addVGL(const ParticleSet& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); }
+  void addV(const ParticleSet& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); }
+  void add_vgl(const ParticleSet& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi)
+  {
+    evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
+  }
+  void add_vector_vgl(const ParticleSet& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi)
+  {
+    evaluate_vgl(P, iat, vals, dpsi, d2psi);
+  }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
new file mode 100644
index 0000000000..fe00655309
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
@@ -0,0 +1,197 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWBasisT.cpp
+ * @brief Definition of member functions of Plane-wave basis set
+ */
+#include "PWBasisT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+int PWBasisT<T>::readbasis(hdf_archive& h5basisgroup,
+                       RealType ecutoff,
+                       const ParticleLayout& lat,
+                       const std::string& pwname,
+                       const std::string& pwmultname,
+                       bool resizeContainer)
+{
+  ///make a local copy
+  Lattice = lat;
+  ecut    = ecutoff;
+  app_log() << "  PWBasisT<T>::" << pwmultname << " is found " << std::endl;
+  h5basisgroup.read(gvecs, "/electrons/kpoint_0/gvectors");
+  NumPlaneWaves = std::max(gvecs.size(), kplusgvecs_cart.size());
+  if (NumPlaneWaves == 0)
+  {
+    app_error() << "  PWBasisT<T>::readbasis Basis is missing. Abort " << std::endl;
+    abort(); //FIX_ABORT
+  }
+  if (kplusgvecs_cart.empty())
+  {
+    kplusgvecs_cart.resize(NumPlaneWaves);
+    for (int i = 0; i < NumPlaneWaves; i++)
+      kplusgvecs_cart[i] = Lattice.k_cart(gvecs[i]);
+  }
+  //app_log() << "  Gx Gy Gz " << std::endl;
+  //for(int i=0; i<kplusgvecs_cart.size(); i++)
+  //{
+  //  app_log() << kplusgvecs_cart[i] << std::endl;
+  //}
+  //Now remove elements outside Ecut. At the same time, fill k+G and |k+G| lists.
+  //Also keep track of the original index ordering (using indexmap[]) so that
+  //orbital coefficients can be ordered and trimmed for ecut in the same way.
+  //support older parser
+  if (resizeContainer)
+    reset();
+  //std::copy(gvecs.begin(),gvecs.end(),std::ostream_iterator<GIndex_t>(std::cout,"\n"));
+  return NumPlaneWaves;
+}
+
+template<class T>
+void PWBasisT<T>::setTwistAngle(const PosType& tang)
+{
+  PosType dang   = twist - tang;
+  bool sameTwist = dot(dang, dang) < std::numeric_limits<RealType>::epsilon();
+  if (maxmaxg && sameTwist)
+    return;
+  twist = tang;
+  reset();
+}
+
+template<class T>
+void PWBasisT<T>::reset()
+{
+  trimforecut();
+  //logC.resize(3,2*maxmaxg+1);
+  Z.resize(NumPlaneWaves, 2 + DIM);
+  Zv.resize(NumPlaneWaves);
+  phi.resize(NumPlaneWaves);
+}
+
+/** Remove basis elements if kinetic energy > ecut.
+ *
+ * Keep and indexmap so we know how to match coefficients on read.
+ */
+template<class T>
+void PWBasisT<T>::trimforecut()
+{
+  //Convert the twist angle to Cartesian coordinates.
+  twist_cart = Lattice.k_cart(twist);
+  inputmap.resize(NumPlaneWaves);
+  app_log() << "  PWBasisT<T>::TwistAngle (unit) =" << twist << std::endl;
+  app_log() << "  PWBasisT<T>::TwistAngle (cart) =" << twist_cart << std::endl;
+  app_log() << "  PWBasisT<T>::trimforecut NumPlaneWaves (before) =" << NumPlaneWaves << std::endl;
+  std::vector<GIndex_t> gvecCopy(gvecs);
+  std::vector<PosType> gcartCopy(kplusgvecs_cart);
+  gvecs.clear();
+  kplusgvecs_cart.clear();
+  minusModKplusG2.reserve(NumPlaneWaves);
+  //  RealType kcutoff2 = 2.0*ecut; //std::sqrt(2.0*ecut);
+  int ngIn = NumPlaneWaves;
+  for (int ig = 0, newig = 0; ig < ngIn; ig++)
+  {
+    //PosType tempvec = Lattice.k_cart(gvecCopy[ig]+twist);
+    PosType tempvec = gcartCopy[ig] + twist_cart;
+    RealType mod2   = dot(tempvec, tempvec);
+
+    // Keep all the g-vectors
+    // The cutoff energy is not stored in the HDF file now.
+    // Is truncating the gvectors to a spherical shell necessary?
+    if (true)
+    {
+      gvecs.push_back(gvecCopy[ig]);
+      kplusgvecs_cart.push_back(tempvec);
+      minusModKplusG2.push_back(-mod2);
+      //Remember which position in the HDF5 file this came from...for coefficients
+      inputmap[ig] = newig++;
+    }
+#if 0
+    if(mod2<=kcutoff2)
+    {
+      gvecs.push_back(gvecCopy[ig]);
+      kplusgvecs_cart.push_back(tempvec);
+      minusModKplusG2.push_back(-mod2);
+      //Remember which position in the HDF5 file this came from...for coefficients
+      inputmap[ig] = newig++;
+    }
+    else
+    {
+      inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves.
+      NumPlaneWaves--;
+    }
+#endif
+  }
+#if defined(PWBasisT_USE_RECURSIVE)
+  //Store the maximum number of translations, within ecut, of any reciprocal cell vector.
+  for (int ig = 0; ig < NumPlaneWaves; ig++)
+    for (int i = 0; i < OHMMS_DIM; i++)
+      if (std::abs(gvecs[ig][i]) > maxg[i])
+        maxg[i] = std::abs(gvecs[ig][i]);
+  gvecs_shifted.resize(NumPlaneWaves);
+  for (int ig = 0; ig < NumPlaneWaves; ig++)
+    gvecs_shifted[ig] = gvecs[ig] + maxg;
+  maxmaxg = std::max(maxg[0], std::max(maxg[1], maxg[2]));
+  //changes the order???? ok
+  C.resize(3, 2 * maxmaxg + 2);
+#else
+  maxmaxg = 1;
+#endif
+  //    //make a copy of input to gvecCopy
+  ////    for(int ig=0, newig=0; ig<ngIn; ig++) {
+  //      //Check size of this g-vector
+  //      PosType tempvec = Lattice.k_cart(gvecCopy[ig]+twist);
+  //      RealType mod2 = dot(tempvec,tempvec);
+  //      if(mod2<=kcutoff2){ //Keep this element
+  //        gvecs.push_back(gvecCopy[ig]);
+  //        kplusgvecs_cart.push_back(tempvec);
+  //        minusModKplusG2.push_back(-mod2);
+  //        //Remember which position in the HDF5 file this came from...for coefficients
+  //        inputmap[ig] = newig++;
+  ////#if !defined(QMC_COMPLEX)
+  ////        //Build the negative vector. See comment at declaration (above) for details.
+  ////        if(gvecCopy[ig][0] < 0)
+  ////          negative.push_back(0);
+  ////        else if(gvecCopy[ig][0] > 0)
+  ////          negative.push_back(1);
+  ////        else { //gx == 0, test gy
+  ////          if(gvecCopy[ig][1] < 0)
+  ////            negative.push_back(0);
+  ////          else if(gvecCopy[ig][1] > 0)
+  ////            negative.push_back(1);
+  ////          else { //gx == gy == 0; test gz. If gz==0 also, take negative=1 (arbitrary)
+  ////            if(gvecCopy[ig][2] < 0)
+  ////              negative.push_back(0);
+  ////            else
+  ////              negative.push_back(1);
+  ////          }
+  ////        }
+  ////#endif
+  //      } else {
+  //        inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves.
+  //        NumPlaneWaves--;
+  //      }
+  //    }
+  //Finalize the basis. Fix temporary values of inputmap.
+  //for(int ig=0; ig<inputmap.size(); ig++)
+  //  if(inputmap[ig] == -1)
+  //    inputmap[ig] = NumPlaneWaves; //For dumping coefficients of PWs>ecut
+  app_log() << "                       NumPlaneWaves (after)  =" << NumPlaneWaves << std::endl;
+}
+// template class PWBasisT<double>;
+// template class PWBasisT<float>;
+template class PWBasisT<std::complex<double>>;
+template class PWBasisT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
new file mode 100644
index 0000000000..a3acaf7aad
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
@@ -0,0 +1,343 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWBasis.h
+ * @brief Declaration of Plane-wave basis set
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H
+
+#include "Configuration.h"
+#include "Particle/ParticleSet.h"
+#include "Message/Communicate.h"
+#include "type_traits/complex_help.hpp"
+#include "CPU/e2iphi.h"
+#include "hdf/hdf_archive.h"
+
+/** If defined, use recursive method to build the basis set for each position
+ *
+ * performance improvement is questionable: load vs sin/cos
+ */
+//#define PWBASIS_USE_RECURSIVE
+
+namespace qmcplusplus
+{
+/** Plane-wave basis set
+ *
+ * Rewrite of PlaneWaveBasis to utilize blas II or III
+ * Support more general input tags
+ */
+template<typename T>
+class PWBasisT : public QMCTraits
+{
+public:
+  using RealType = typename RealAlias_impl<T>::value_type;
+  using ComplexType = T;
+  using PosType = TinyVector<RealType, DIM>;
+  using IndexType = QMCTraits::IndexType;
+  using ParticleLayout = ParticleSet::ParticleLayout;
+  using GIndex_t       = TinyVector<IndexType, 3>;
+
+private:
+  ///max of maxg[i]
+  int maxmaxg;
+  //Need to store the maximum translation in each dimension to use recursive PW generation.
+  GIndex_t maxg;
+  //The PlaneWave data - keep all of these strictly private to prevent inconsistencies.
+  RealType ecut;
+  ///twist angle in reduced
+  PosType twist;
+  ///twist angle in cartesian
+  PosType twist_cart; //Twist angle in reduced and Cartesian.
+
+  ///gvecs in reduced coordiates
+  std::vector<GIndex_t> gvecs;
+  ///Reduced coordinates with offset gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
+  std::vector<GIndex_t> gvecs_shifted;
+
+  std::vector<RealType> minusModKplusG2;
+  std::vector<PosType> kplusgvecs_cart; //Cartesian.
+
+  Matrix<ComplexType> C;
+  //Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not exp(iGr)
+  //We need a way of switching between them for G -> -G, otherwise the
+  //determinant will have multiple rows that are equal (to within a constant factor)
+  //of others, giving a zero determinant. For this, we build a vector (negative) which
+  //stores whether a vector is "+" or "-" (with some criterion, to be defined). We
+  //the switch from cos() to sin() based on the value of this input.
+  std::vector<int> negative;
+
+public:
+  //enumeration for the value, laplacian, gradients and size
+  enum
+  {
+    PW_VALUE,
+    PW_LAP,
+    PW_GRADX,
+    PW_GRADY,
+    PW_GRADZ,
+    PW_MAXINDEX
+  };
+
+  Matrix<ComplexType> Z;
+
+  Vector<ComplexType> Zv;
+  /* inputmap is used for a memory efficient way of
+   *
+   * importing the basis-set and coefficients when the desired energy cutoff may be
+   * lower than that represented by all data in the wavefunction input file.
+   * The steps taken are:
+   *  - Read all basis data.
+   *  - Create map. inputmap[i] = j; j is correct PW index, i is input coef index.
+   *    For basis elements outside cutoff, inputmap[i] = gvecs.size();
+   *  - Coefficients are in same order as PWs in inputfile => simply file into
+   *    storage matrix using the map as the input. All excess coefficients are
+   *    put into [gvecs.size()] and not used. i.e. coefs need to be allocated 1 higher.
+   * Such an approach is not needed for Gamma-point only calculations because the
+   * basis is spherically ordered. However, when a twist-angle is used, the "sphere"
+   * of allowed planewaves is shifted.
+   */
+
+  Vector<RealType> phi;
+
+  std::vector<int> inputmap;
+
+  ///total number of basis functions
+  int NumPlaneWaves;
+
+  ///local copy of Lattice
+  ParticleLayout Lattice;
+
+  ///default constructor
+  PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {}
+
+  ///constructor
+  PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {}
+
+  ~PWBasisT() {}
+
+  ///set the twist angle
+  void setTwistAngle(const PosType& tang);
+
+  ///reset
+  void reset();
+
+  /** Read basisset from hdf5 file. Apply ecut.
+   * @param h5basisgroup h5 node where basis is located
+   * @param ecutoff cutoff energy
+   * @param lat CrystalLattice
+   * @param resizeContainer if true, resize internal storage.
+   * @return the number of plane waves
+   */
+  int readbasis(hdf_archive& h5basisgroup,
+                RealType ecutoff,
+                const ParticleLayout& lat,
+                const std::string& pwname     = "planewaves",
+                const std::string& pwmultname = "multipliers",
+                bool resizeContainer          = true);
+
+  /** Remove basis elements if kinetic energy > ecut.
+   *
+   * Keep and indexmap so we know how to match coefficients on read.
+   */
+  void trimforecut();
+
+#if defined(PWBASIS_USE_RECURSIVE)
+  /** Fill the recursion coefficients matrix.
+   *
+   * @todo Generalize to non-orthorohmbic cells
+   */
+  inline void BuildRecursionCoefs(const PosType& pos)
+  {
+    PosType tau_red(Lattice.toUnit(pos));
+//      RealType phi=TWOPI*tau_red[0];
+//      RealType nphi=maxg0*phi;
+//      ComplexType ct0(std::cos(phi),std::sin(phi));
+//      ComplexType t(std::cos(nphi),-std::sin(nphi));
+//      C0[0]=t;
+//      for(int n=1; n<=2*maxg0; n++) C0[n] = (t *= ct0);
+//
+//      phi=TWOPI*tau_red[1];
+//      nphi=maxg1*phi;
+//      ct0=ComplexType(std::cos(phi),std::sin(phi));
+//      t=ComplexType(std::cos(nphi),-std::sin(nphi));
+//      C1[0]=t;
+//      for(int n=1; n<=2*maxg1; n++) C1[n] = (t *= ct0);
+//
+//      phi=TWOPI*tau_red[2];
+//      nphi=maxg2*phi;
+//      ct0=ComplexType(std::cos(phi),std::sin(phi));
+//      t=ComplexType(std::cos(nphi),-std::sin(nphi));
+//      C2[0]=t;
+//      for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0);
+#pragma ivdep
+    for (int idim = 0; idim < 3; idim++)
+    {
+      int ng        = maxg[idim];
+      RealType phi  = TWOPI * tau_red[idim];
+      RealType nphi = ng * phi;
+      ComplexType Ctemp(std::cos(phi), std::sin(phi));
+      ComplexType t(std::cos(nphi), -std::sin(nphi));
+      ComplexType* restrict cp_ptr = C[idim];
+      *cp_ptr++                    = t;
+      for (int n = 1; n <= 2 * ng; n++)
+      {
+        *cp_ptr++ = (t *= Ctemp);
+      }
+    }
+    //Base version
+    //#pragma ivdep
+    //      for(int idim=0; idim<3; idim++){
+    //        RealType phi=TWOPI*tau_red[idim];
+    //        ComplexType Ctemp(std::cos(phi),std::sin(phi));
+    //        int ng=maxg[idim];
+    //        ComplexType* restrict cp_ptr=C[idim]+ng;
+    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //        *cp_ptr=1.0;
+    //        for(int n=1; n<=ng; n++,cn_ptr--){
+    //          ComplexType t(Ctemp*(*cp_ptr++));
+    //          *cp_ptr = t;
+    //          *cn_ptr = conj(t);
+    //        }
+    //      }
+    //Not valid for general supercell
+    //      // Cartesian of twist for 1,1,1 (reduced coordinates)
+    //      PosType G111(1.0,1.0,1.0);
+    //      G111 = Lattice.k_cart(G111);
+    //
+    //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
+    //      //using a fast recursion algorithm
+    //#pragma ivdep
+    //      for(int idim=0; idim<3; idim++){
+    //        //start the recursion with the 111 vector.
+    //        RealType phi = pos[idim] * G111[idim];
+    //        register ComplexType Ctemp(std::cos(phi), std::sin(phi));
+    //        int ng=maxg[idim];
+    //        ComplexType* restrict cp_ptr=C[idim]+ng;
+    //        ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //        *cp_ptr=1.0;
+    //        for(int n=1; n<=ng; n++,cn_ptr--){
+    //          ComplexType t(Ctemp*(*cp_ptr++));
+    //          *cp_ptr = t;
+    //          *cn_ptr = conj(t);
+    //        }
+    //      }
+  }
+
+  inline void evaluate(const PosType& pos)
+  {
+    BuildRecursionCoefs(pos);
+    RealType twistdotr = dot(twist_cart, pos);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    //Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations are for (twist+G).r
+      ComplexType pw(pw0); //std::cos(twistdotr),std::sin(twistdotr));
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      //pw *= C0[gvecs_shifted[ig][0]];
+      //pw *= C1[gvecs_shifted[ig][1]];
+      //pw *= C2[gvecs_shifted[ig][2]];
+      Zv[ig] = pw;
+    }
+  }
+  /** Evaluate all planewaves and derivatives for the iat-th particle
+   *
+   * The basis functions are evaluated for particles iat: first <= iat < last
+   * Evaluate the plane-waves at current particle coordinates using a fast
+   * recursion algorithm. Order of Y,dY and d2Y is kept correct.
+   * These can be "dotted" with coefficients later to complete orbital evaluations.
+   */
+  inline void evaluateAll(const ParticleSet& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    BuildRecursionCoefs(r);
+    RealType twistdotr = dot(twist_cart, r);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    //Evaluate the planewaves and derivatives.
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
+      //are for (twist+G).r
+      ComplexType pw(pw0);
+      // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      //pw *= C0[gvecs_shifted[ig][0]];
+      //pw *= C1[gvecs_shifted[ig][1]];
+      //pw *= C2[gvecs_shifted[ig][2]];
+      zptr[0] = pw;
+      zptr[1] = minusModKplusG2[ig] * pw;
+      zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+    }
+  }
+#else
+  inline void evaluate(const PosType& pos)
+  {
+    //Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
+      phi[ig] = dot(kplusgvecs_cart[ig], pos);
+    eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
+  }
+  inline void evaluateAll(const ParticleSet& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    evaluate(r);
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
+    {
+      //PW is initialized as exp(i*twist.r) so that the final basis evaluations
+      //are for (twist+G).r
+      ComplexType& pw = Zv[ig];
+      zptr[0]         = pw;
+      zptr[1]         = minusModKplusG2[ig] * pw;
+      zptr[2]         = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3]         = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4]         = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
+    }
+  }
+#endif
+  //    /** Fill the recursion coefficients matrix.
+  //     *
+  //     * @todo Generalize to non-orthorohmbic cells
+  //     */
+  //    void BuildRecursionCoefsByAdd(const PosType& pos)
+  //    {
+  //      // Cartesian of twist for 1,1,1 (reduced coordinates)
+  //      PosType G111(1.0,1.0,1.0);
+  //      G111 = Lattice.k_cart(G111);
+  //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
+  //      //Precompute a small number of complex factors (PWs along b1,b2,b3 lines)
+  //      for(int idim=0; idim<3; idim++){
+  //        //start the recursion with the 111 vector.
+  //        RealType phi = pos[idim] * G111[idim];
+  //        int ng(maxg[idim]);
+  //        RealType* restrict cp_ptr=logC[idim]+ng;
+  //        RealType* restrict cn_ptr=logC[idim]+ng-1;
+  //        *cp_ptr=0.0;
+  //        //add INTEL vectorization
+  //        for(int n=1; n<=ng; n++,cn_ptr--){
+  //          RealType t(phi+*cp_ptr++);
+  //          *cp_ptr = t;
+  //          *cn_ptr = -t;
+  //        }
+  //      }
+  //    }
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
index 5add827a86..225033214b 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSet.h
@@ -15,8 +15,8 @@
 /** @file PWOrbitalSet.h
  * @brief Definition of member functions of Plane-wave basis set
  */
-#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
-#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H
 
 #include "QMCWaveFunctions/PlaneWave/PWBasis.h"
 #include "QMCWaveFunctions/SPOSet.h"
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
new file mode 100644
index 0000000000..a3b1e135ec
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
@@ -0,0 +1,145 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "Message/Communicate.h"
+#include "PWOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+
+namespace qmcplusplus
+{
+template<class T>
+PWOrbitalSetT<T>::~PWOrbitalSetT()
+{
+  if (OwnBasisSet && myBasisSet)
+    delete myBasisSet;
+  if (!IsCloned && this->C != nullptr)
+    delete this->C;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> PWOrbitalSetT<T>::makeClone() const
+{
+  auto myclone        = std::make_unique<PWOrbitalSetT<T>>(*this);
+  myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
+  myclone->IsCloned   = true;
+  return myclone;
+}
+
+template<class T>
+void PWOrbitalSetT<T>::setOrbitalSetSize(int norbs) {}
+
+template<class T>
+void PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+{
+  myBasisSet     = bset;
+  this->OrbitalSetSize = nbands;
+  OwnBasisSet    = cleanup;
+  BasisSetSize   = myBasisSet->NumPlaneWaves;
+  this->C              = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
+  this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+  app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize =" << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize
+            << std::endl;
+}
+
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  //Evaluate every orbital for particle iat.
+  //Evaluate the basis-set at these coordinates:
+  //myBasisSet->evaluate(P,iat);
+  myBasisSet->evaluate(P.activeR(iat));
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  //Evaluate the orbitals and derivatives for particle iat only.
+  myBasisSet->evaluateAll(P, iat);
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+  const T* restrict tptr = this->Temp.data();
+  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+  {
+    psi[j]   = tptr[PW_VALUE];
+    d2psi[j] = tptr[PW_LAP];
+    dpsi[j]  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+  }
+}
+
+template<class T>
+void PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                        int first,
+                                        int last,
+                                        ValueMatrix& logdet,
+                                        GradMatrix& dlogdet,
+                                        ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    myBasisSet->evaluateAll(P, iat);
+    MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+    const T* restrict tptr = this->Temp.data();
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+    {
+      logdet(i, j)   = tptr[PW_VALUE];
+      d2logdet(i, j) = tptr[PW_LAP];
+      dlogdet(i, j)  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+    }
+  }
+}
+
+// Class concrete types from T
+// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is inherently complex
+// template class PWOrbitalSetT<double>;
+// template class PWOrbitalSetT<float>;
+template class PWOrbitalSetT<std::complex<double>>;
+template class PWOrbitalSetT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
new file mode 100644
index 0000000000..25c3e0d5c1
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
@@ -0,0 +1,128 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWOrbitalSetT.h
+ * @brief Definition of member functions of Plane-wave basis set
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_ORBITALSET_BLAS_H
+
+#include "QMCWaveFunctions/PlaneWave/PWBasisT.h"
+#include "type_traits/complex_help.hpp"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "CPU/BLAS.hpp"
+
+namespace qmcplusplus
+{
+
+template<class T>
+class PWOrbitalSetT : public SPOSetT<T>
+{
+public:
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = T;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType    = typename SPOSetT<T>::GradType;
+  using IndexType   = typename SPOSetT<T>::IndexType;
+
+  using BasisSet_t = PWBasisT<T>;
+  using PWBasisPtr = PWBasisT<T>*;
+
+  /** inherit the enum of BasisSet_t */
+  enum
+  {
+    PW_VALUE    = BasisSet_t::PW_VALUE,
+    PW_LAP      = BasisSet_t::PW_LAP,
+    PW_GRADX    = BasisSet_t::PW_GRADX,
+    PW_GRADY    = BasisSet_t::PW_GRADY,
+    PW_GRADZ    = BasisSet_t::PW_GRADZ,
+    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+  };
+
+
+  /** default constructor
+  */
+  PWOrbitalSetT<T>(const std::string& my_name)
+      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false)
+  {}
+
+  std::string getClassName() const override { return "PWOrbitalSetT"; }
+
+
+  /** delete BasisSet only it owns this
+   *
+   * Builder takes care of who owns what
+   */
+  ~PWOrbitalSetT<T>() override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+  /** resize  the orbital base
+   * @param bset PWBasis
+   * @param nbands number of bands
+   * @param cleaup if true, owns PWBasis. Will clean up.
+   */
+  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+
+  /** Builder class takes care of the assertion
+  */
+  void addVector(const std::vector<ComplexType>& coefs, int jorb);
+  void addVector(const std::vector<RealType>& coefs, int jorb);
+
+  void setOrbitalSetSize(int norbs) override;
+
+  inline T evaluate(int ib, const PosType& pos)
+  {
+    myBasisSet->evaluate(pos);
+    return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
+  }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  /** boolean
+   *
+   * If true, this has to delete the BasisSet
+   */
+  bool OwnBasisSet;
+  ///TwistAngle of this PWOrbitalSetT
+  PosType TwistAngle;
+  ///My basis set
+  PWBasisPtr myBasisSet;
+  ///number of basis
+  IndexType BasisSetSize;
+  /** pointer to matrix containing the coefficients
+   *
+   * makeClone makes a shallow copy and flag IsCloned
+   */
+  ValueMatrix* C;
+  ///if true, do not clean up
+  bool IsCloned;
+
+  /** temporary array to perform gemm operation */
+  Matrix<T> Temp;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp
new file mode 100644
index 0000000000..3286624090
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp
@@ -0,0 +1,165 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWRealOrbitalSetT.cpp
+ * @brief declaration of the member functions of PWRealOrbitalSetT
+ *
+ * Not the most optimized method to use wavefunctions in a plane-wave basis.
+ */
+#include "Message/Communicate.h"
+#include "PWRealOrbitalSetT.h"
+#include "Numerics/MatrixOperators.h"
+#include "type_traits/ConvertToReal.h"
+
+namespace qmcplusplus
+{
+template<class T>
+PWRealOrbitalSetT<T>::~PWRealOrbitalSetT()
+{
+  if (OwnBasisSet && myBasisSet)
+    delete myBasisSet;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> PWRealOrbitalSetT<T>::makeClone() const
+{
+  auto myclone        = std::make_unique<PWRealOrbitalSetT<T>>(*this);
+  myclone->myBasisSet = new PWBasis(*(this->myBasisSet));
+  return myclone;
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{}
+
+template<class T>
+void PWRealOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+{
+  myBasisSet           = bset;
+  this->OrbitalSetSize = nbands;
+  OwnBasisSet          = cleanup;
+  BasisSetSize         = myBasisSet->NumPlaneWaves;
+  CC.resize(this->OrbitalSetSize, BasisSetSize);
+  Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+  tempPsi.resize(this->OrbitalSetSize);
+  app_log() << "  PWRealOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize
+            << " BasisSetSize = " << BasisSetSize << std::endl;
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      CC[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+{
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  //drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      CC[jorb][inputmap[ig]] = coefs[ig];
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  myBasisSet->evaluate(P.activeR(iat));
+  MatrixOperators::product(CC, myBasisSet->Zv, tempPsi);
+  for (int j = 0; j < this->OrbitalSetSize; j++)
+    psi[j] = tempPsi[j].real();
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluateVGL(const ParticleSet& P,
+                                       int iat,
+                                       ValueVector& psi,
+                                       GradVector& dpsi,
+                                       ValueVector& d2psi)
+{
+  myBasisSet->evaluateAll(P, iat);
+  MatrixOperators::product(CC, myBasisSet->Z, Temp);
+  const ComplexType* restrict tptr = Temp.data();
+  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+  {
+    psi[j]   = tptr[PW_VALUE].real();
+    d2psi[j] = tptr[PW_LAP].real();
+#if OHMMS_DIM == 3
+    dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real());
+#elif OHMMS_DIM == 2
+    dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real());
+#elif OHMMS_DIM == 1
+    dpsi[j] = GradType(tptr[PW_GRADX].real());
+#else
+#error "Only physical dimensions 1/2/3 are supported."
+#endif
+  }
+}
+
+template<class T>
+void PWRealOrbitalSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& logdet,
+                                                GradMatrix& dlogdet,
+                                                ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    myBasisSet->evaluateAll(P, iat);
+    MatrixOperators::product(CC, myBasisSet->Z, Temp);
+    const ComplexType* restrict tptr = Temp.data();
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+    {
+      convertToReal(tptr[PW_VALUE], logdet(i, j));
+      convertToReal(tptr[PW_LAP], d2logdet(i, j));
+#if OHMMS_DIM == 3
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real());
+#elif OHMMS_DIM == 2
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real());
+#elif OHMMS_DIM == 1
+      dlogdet(i, j) = GradType(tptr[PW_GRADX].real());
+#else
+#error "Only physical dimensions 1/2/3 are supported."
+#endif
+    }
+  }
+}
+
+template class SPOSetT<double>;
+template class SPOSetT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
new file mode 100644
index 0000000000..8455b1e561
--- /dev/null
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h
@@ -0,0 +1,143 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file PWRealOrbitalSetT.h
+ * @brief Define PWRealOrbitalSetT derived from SPOSetT
+ *
+ * This is a specialized single-particle orbital set for real trial
+ * wavefunctions and enabled with QMC_COMPLEX=0
+ */
+#ifndef QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H
+#define QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H
+
+#include "QMCWaveFunctions/PlaneWave/PWBasis.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "CPU/BLAS.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+class PWRealOrbitalSetT : public SPOSetT<T>
+{
+public:
+  using BasisSet_t = PWBasis;
+  using PWBasisPtr = PWBasis*;
+
+  using IndexType   = typename SPOSetT<T>::IndexType;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = typename SPOSetT<T>::ComplexType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using PosType     = typename SPOSetT<T>::PosType;
+
+  /** inherit the enum of BasisSet_t */
+  enum
+  {
+    PW_VALUE    = BasisSet_t::PW_VALUE,
+    PW_LAP      = BasisSet_t::PW_LAP,
+    PW_GRADX    = BasisSet_t::PW_GRADX,
+    PW_GRADY    = BasisSet_t::PW_GRADY,
+    PW_GRADZ    = BasisSet_t::PW_GRADZ,
+    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+  };
+
+  /** default constructor
+  */
+  PWRealOrbitalSetT(const std::string& my_name)
+      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0)
+  {}
+
+  std::string getClassName() const override { return "PWRealOrbitalSetT"; }
+
+  /** delete BasisSet only it owns this
+   *
+   * Builder takes care of who owns what
+   */
+  ~PWRealOrbitalSetT() override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  /** resize  the orbital base
+   * @param bset PWBasis
+   * @param nbands number of bands
+   * @param cleaup if true, owns PWBasis. Will clean up.
+   */
+  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+
+  /** add eigenstate for jorb-th orbital
+   * @param coefs real input data
+   * @param jorb orbital index
+   */
+  void addVector(const std::vector<RealType>& coefs, int jorb);
+
+  /** add eigenstate for jorb-th orbital
+   * @param coefs complex input data
+   * @param jorb orbital index
+   */
+  void addVector(const std::vector<ComplexType>& coefs, int jorb);
+
+  void setOrbitalSetSize(int norbs) override;
+
+  inline T evaluate(int ib, const PosType& pos)
+  {
+    myBasisSet->evaluate(pos);
+    return real(BLAS::dot(BasisSetSize, CC[ib], myBasisSet->Zv.data()));
+  }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    APP_ABORT("Need specialization of evaluate_notranspose() for grad_grad_logdet. \n");
+  }
+
+
+  /** boolean
+   *
+   * If true, this has to delete the BasisSet
+   */
+  bool OwnBasisSet;
+  ///TwistAngle of this PWRealOrbitalSet
+  PosType TwistAngle;
+  ///My basis set
+  PWBasisPtr myBasisSet;
+  ///number of basis
+  IndexType BasisSetSize;
+  ///Plane-wave coefficients of complex: (iband,g-vector)
+  Matrix<ComplexType> CC;
+  /// temporary array to perform gemm operation
+  Matrix<ComplexType> Temp;
+  ///temporary complex vector before assigning to a real psi
+  Vector<ComplexType> tempPsi;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
new file mode 100644
index 0000000000..a8a91bfa6a
--- /dev/null
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -0,0 +1,1834 @@
+//////////////////////////////////////////////////////////////////////////////////////
+//// This file is distributed under the University of Illinois/NCSA Open Source
+/// License. / See LICENSE file in top directory for details.
+////
+//// Copyright (c) QMCPACK developers.
+////
+//// File developed by: Sergio D. Pineda Flores,
+/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
+/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
+/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+////
+//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
+/// University of California, Berkeley
+////////////////////////////////////////////////////////////////////////////////////////
+#include "RotatedSPOsT.h"
+
+#include "CPU/BLAS.hpp"
+#include "Numerics/DeterminantOperators.h"
+#include "Numerics/MatrixOperators.h"
+#include "io/hdf/hdf_archive.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
+    : SPOSetT<T>(my_name),
+      OptimizableObject(my_name),
+      Phi(std::move(spos)),
+      nel_major_(0),
+      params_supplied(false),
+      apply_rotation_timer_(createGlobalTimer("RotatedSPOsT::apply_rotation", timer_level_fine))
+{
+  this->OrbitalSetSize = Phi->getOrbitalSetSize();
+}
+
+template<typename T>
+RotatedSPOsT<T>::~RotatedSPOsT()
+{}
+
+template<typename T>
+void RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
+{
+  params          = param_list;
+  params_supplied = true;
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
+{
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices)
+{
+  rot_indices.reserve(nmo * (nmo - 1) / 2);
+
+  // start with core-active rotations - put them at the beginning of the list
+  // so it matches the other list of rotation indices
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add core-core rotations - put them at the end of the list
+  for (int i = 0; i < nel; i++)
+    for (int j = i + 1; j < nel; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add active-active rotations - put them at the end of the list
+  for (int i = nel; i < nmo; i++)
+    for (int j = i + 1; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const std::vector<RealType>& param,
+                                                   ValueMatrix& rot_mat)
+{
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
+
+  rot_mat = 0.0;
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p      = rot_indices[i].first;
+    const int q      = rot_indices[i].second;
+    const RealType x = param[i];
+
+    rot_mat[q][p] = x;
+    rot_mat[p][q] = -x;
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                           const ValueMatrix& rot_mat,
+                                                           std::vector<RealType>& param)
+{
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p = rot_indices[i].first;
+    const int q = rot_indices[i].second;
+    param[i]    = rot_mat[q][p];
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::resetParametersExclusive(const opt_variables_type& active)
+{
+  std::vector<RealType> delta_param(m_act_rot_inds.size());
+
+  size_t psize = m_act_rot_inds.size();
+
+  if (use_global_rot_)
+  {
+    psize = m_full_rot_inds.size();
+    assert(psize >= m_act_rot_inds.size());
+  }
+
+  std::vector<RealType> old_param(psize);
+  std::vector<RealType> new_param(psize);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int loc         = this->myVars.where(i);
+    delta_param[i]  = active[loc] - this->myVars[i];
+    this->myVars[i] = active[loc];
+  }
+
+  if (use_global_rot_)
+  {
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      old_param[i] = myVarsFull[i];
+
+    applyDeltaRotation(delta_param, old_param, new_param);
+
+    // Save the the params
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      myVarsFull[i] = new_param[i];
+  }
+  else
+  {
+    apply_rotation(delta_param, false);
+
+    // Save the parameters in the history list
+    history_params_.push_back(delta_param);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
+{
+  hout.push("RotatedSPOsT");
+  if (use_global_rot_)
+  {
+    hout.push("rotation_global");
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+
+    int nparam_full = myVarsFull.size();
+    std::vector<RealType> full_params(nparam_full);
+    for (int i = 0; i < nparam_full; i++)
+      full_params[i] = myVarsFull[i];
+
+    hout.write(full_params, rot_global_name);
+    hout.pop();
+  }
+  else
+  {
+    hout.push("rotation_history");
+    size_t rows = history_params_.size();
+    size_t cols = 0;
+    if (rows > 0)
+      cols = history_params_[0].size();
+
+    Matrix<RealType> tmp(rows, cols);
+    for (size_t i = 0; i < rows; i++)
+      for (size_t j = 0; j < cols; j++)
+        tmp(i, j) = history_params_[i][j];
+
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    hout.write(tmp, rot_hist_name);
+    hout.pop();
+  }
+
+  // Save myVars in order to restore object state exactly
+  //  The values aren't meaningful, but they need to match those saved in
+  //  VariableSet
+  hout.push("rotation_params");
+  std::string rot_params_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+
+  int nparam = this->myVars.size();
+  std::vector<RealType> params(nparam);
+  for (int i = 0; i < nparam; i++)
+    params[i] = this->myVars[i];
+
+  hout.write(params, rot_params_name);
+  hout.pop();
+
+  hout.pop();
+}
+
+template<typename T>
+void RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
+{
+  hin.push("RotatedSPOsT", false);
+
+  bool grp_hist_exists   = hin.is_group("rotation_history");
+  bool grp_global_exists = hin.is_group("rotation_global");
+  if (!grp_hist_exists && !grp_global_exists)
+    app_warning() << "Rotation parameters not found in VP file";
+
+  if (grp_global_exists)
+  {
+    hin.push("rotation_global", false);
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+
+    std::vector<int> sizes(1);
+    if (!hin.getShape<RealType>(rot_global_name, sizes))
+      throw std::runtime_error("Failed to read rotation_global in VP file");
+
+    int nparam_full_actual = sizes[0];
+    int nparam_full        = myVarsFull.size();
+
+    if (nparam_full != nparam_full_actual)
+    {
+      std::ostringstream tmp_err;
+      tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file ("
+              << nparam_full_actual << ")";
+      throw std::runtime_error(tmp_err.str());
+    }
+    std::vector<RealType> full_params(nparam_full);
+    hin.read(full_params, rot_global_name);
+    for (int i = 0; i < nparam_full; i++)
+      myVarsFull[i] = full_params[i];
+
+    hin.pop();
+
+    applyFullRotation(full_params, true);
+  }
+  else if (grp_hist_exists)
+  {
+    hin.push("rotation_history", false);
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    std::vector<int> sizes(2);
+    if (!hin.getShape<RealType>(rot_hist_name, sizes))
+      throw std::runtime_error("Failed to read rotation history in VP file");
+
+    int rows = sizes[0];
+    int cols = sizes[1];
+    history_params_.resize(rows);
+    Matrix<RealType> tmp(rows, cols);
+    hin.read(tmp, rot_hist_name);
+    for (size_t i = 0; i < rows; i++)
+    {
+      history_params_[i].resize(cols);
+      for (size_t j = 0; j < cols; j++)
+        history_params_[i][j] = tmp(i, j);
+    }
+
+    hin.pop();
+
+    applyRotationHistory();
+  }
+
+  hin.push("rotation_params", false);
+  std::string rot_param_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+
+  std::vector<int> sizes(1);
+  if (!hin.getShape<RealType>(rot_param_name, sizes))
+    throw std::runtime_error("Failed to read rotation_params in VP file");
+
+  int nparam_actual = sizes[0];
+  int nparam        = this->myVars.size();
+  if (nparam != nparam_actual)
+  {
+    std::ostringstream tmp_err;
+    tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file ("
+            << nparam_actual << ")";
+    throw std::runtime_error(tmp_err.str());
+  }
+
+  std::vector<RealType> params(nparam);
+  hin.read(params, rot_param_name);
+  for (int i = 0; i < nparam; i++)
+    this->myVars[i] = params[i];
+
+  hin.pop();
+
+  hin.pop();
+}
+
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+{
+#if !defined(QMC_COMPLEX)
+  /* Only rebuild optimized variables if more after-rotation orbitals are
+	 * needed Consider ROHF, there is only one set of SPO for both spin up and
+	 * down Nup > Ndown. nel_major_ will be set Nup.
+	 *
+	 * Use the size of myVars as a flag to avoid building the rotation
+	 * parameters again when a clone is made (the DiracDeterminant constructor
+	 * calls buildOptVariables)
+	 */
+  if (nel > nel_major_ && this->myVars.size() == 0)
+  {
+    nel_major_ = nel;
+
+    const size_t nmo = Phi->getOrbitalSetSize();
+
+    // create active rotation parameter indices
+    RotationIndices created_m_act_rot_inds;
+
+    RotationIndices created_full_rot_inds;
+    if (use_global_rot_)
+      createRotationIndicesFull(nel, nmo, created_full_rot_inds);
+
+    createRotationIndices(nel, nmo, created_m_act_rot_inds);
+
+    buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
+  }
+#endif
+}
+
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations)
+{
+#if !defined(QMC_COMPLEX)
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  // create active rotations
+  m_act_rot_inds = rotations;
+
+  if (use_global_rot_)
+    m_full_rot_inds = full_rotations;
+
+  if (use_global_rot_)
+    app_log() << "Orbital rotation using global rotation" << std::endl;
+  else
+    app_log() << "Orbital rotation using history" << std::endl;
+
+  // This will add the orbital rotation parameters to myVars
+  // and will also read in initial parameter values supplied in input file
+  int p, q;
+  int nparams_active = m_act_rot_inds.size();
+
+  app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl;
+  if (params_supplied)
+    if (nparams_active != params.size())
+      throw std::runtime_error("The number of supplied orbital rotation parameters does not "
+                               "match number prdouced by the slater "
+                               "expansion. \n");
+
+  this->myVars.clear();
+  for (int i = 0; i < nparams_active; i++)
+  {
+    p = m_act_rot_inds[i].first;
+    q = m_act_rot_inds[i].second;
+    std::stringstream sstr;
+    sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "")
+         << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q;
+
+    // If the user input parameters, use those. Otherwise, initialize the
+    // parameters to zero
+    if (params_supplied)
+    {
+      this->myVars.insert(sstr.str(), params[i]);
+    }
+    else
+    {
+      this->myVars.insert(sstr.str(), 0.0);
+    }
+  }
+
+  if (use_global_rot_)
+  {
+    myVarsFull.clear();
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+    {
+      p = m_full_rot_inds[i].first;
+      q = m_full_rot_inds[i].second;
+      std::stringstream sstr;
+      sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "")
+           << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "")
+           << q;
+
+      if (params_supplied && i < m_act_rot_inds.size())
+        myVarsFull.insert(sstr.str(), params[i]);
+      else
+        myVarsFull.insert(sstr.str(), 0.0);
+    }
+  }
+
+  // Printing the parameters
+  if (true)
+  {
+    app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n";
+    this->myVars.print(app_log());
+  }
+
+  if (params_supplied)
+  {
+    std::vector<RealType> param(m_act_rot_inds.size());
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+      param[i] = this->myVars[i];
+    apply_rotation(param, false);
+  }
+#endif
+}
+
+template<typename T>
+void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool use_stored_copy)
+{
+  assert(param.size() == m_act_rot_inds.size());
+
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
+
+  /*
+	  rot_mat is now an anti-hermitian matrix. Now we convert
+	  it into a unitary matrix via rot_mat = exp(-rot_mat).
+	  Finally, apply unitary matrix to orbs.
+	*/
+  exponentiate_antisym_matrix(rot_mat);
+  {
+    ScopedTimer local(apply_rotation_timer_);
+    Phi->applyRotation(rot_mat, use_stored_copy);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
+                                         const std::vector<RealType>& old_param,
+                                         std::vector<RealType>& new_param)
+{
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix new_rot_mat(nmo, nmo);
+  constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
+
+  {
+    ScopedTimer local(apply_rotation_timer_);
+    Phi->applyRotation(new_rot_mat, true);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                             const std::vector<RealType>& old_param,
+                                             const RotationIndices& act_rot_inds,
+                                             const RotationIndices& full_rot_inds,
+                                             std::vector<RealType>& new_param,
+                                             ValueMatrix& new_rot_mat)
+{
+  assert(delta_param.size() == act_rot_inds.size());
+  assert(old_param.size() == full_rot_inds.size());
+  assert(new_param.size() == full_rot_inds.size());
+
+  const size_t nmo = new_rot_mat.rows();
+  assert(new_rot_mat.rows() == new_rot_mat.cols());
+
+  ValueMatrix old_rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
+  exponentiate_antisym_matrix(old_rot_mat);
+
+  ValueMatrix delta_rot_mat(nmo, nmo);
+
+  constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
+  exponentiate_antisym_matrix(delta_rot_mat);
+
+  // Apply delta rotation to old rotation.
+  BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(),
+             nmo);
+
+  ValueMatrix log_rot_mat(nmo, nmo);
+  log_antisym_matrix(new_rot_mat, log_rot_mat);
+  extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy)
+{
+  assert(full_param.size() == m_full_rot_inds.size());
+
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
+  rot_mat = T(0);
+
+  constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
+
+  /*
+	  rot_mat is now an anti-hermitian matrix. Now we convert
+	  it into a unitary matrix via rot_mat = exp(-rot_mat).
+	  Finally, apply unitary matrix to orbs.
+	*/
+  exponentiate_antisym_matrix(rot_mat);
+  Phi->applyRotation(rot_mat, use_stored_copy);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::applyRotationHistory()
+{
+  for (auto delta_param : history_params_)
+  {
+    apply_rotation(delta_param, false);
+  }
+}
+
+// compute exponential of a real, antisymmetric matrix by diagonalizing and
+// exponentiating eigenvalues
+template<typename T>
+void RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
+{
+  const int n = mat.rows();
+  std::vector<std::complex<RealType>> mat_h(n * n, 0);
+  std::vector<RealType> eval(n, 0);
+  std::vector<std::complex<RealType>> work(2 * n, 0);
+  std::vector<RealType> rwork(3 * n, 0);
+  std::vector<std::complex<RealType>> mat_d(n * n, 0);
+  std::vector<std::complex<RealType>> mat_t(n * n, 0);
+  // exponentiating e^X = e^iY (Y hermitian)
+  // i(-iX) = X, so -iX is hermitian
+  // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
+  // construct hermitian analogue of mat by multiplying by -i
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = i; j < n; ++j)
+    {
+      mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
+      mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+    }
+  }
+  // diagonalize the matrix
+  char JOBZ('V');
+  char UPLO('U');
+  int N(n);
+  int LDA(n);
+  int LWORK(2 * n);
+  int info = 0;
+  LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+  // iterate through diagonal matrix, exponentiate terms
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      mat_d[i + j * n] = (i == j) ? std::exp(std::complex<RealType>(0.0, eval[i])) : std::complex<RealType>(0.0, 0.0);
+    }
+  }
+  // perform matrix multiplication
+  // assume row major
+  BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
+  BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_d[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in orbital "
+                     "rotation matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl;
+      }
+      mat[j][i] = mat_d[i + n * j].real();
+    }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
+{
+  const int n = mat.rows();
+  std::vector<RealType> mat_h(n * n, 0);
+  std::vector<RealType> eval_r(n, 0);
+  std::vector<RealType> eval_i(n, 0);
+  std::vector<RealType> mat_l(n * n, 0);
+  std::vector<RealType> work(4 * n, 0);
+
+  std::vector<std::complex<RealType>> mat_cd(n * n, 0);
+  std::vector<std::complex<RealType>> mat_cl(n * n, 0);
+  std::vector<std::complex<RealType>> mat_ch(n * n, 0);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+      mat_h[i + n * j] = mat[i][j];
+
+  // diagonalize the matrix
+  char JOBL('V');
+  char JOBR('N');
+  int N(n);
+  int LDA(n);
+  int LWORK(4 * n);
+  int info = 0;
+  LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA,
+               &work.at(0), &LWORK, &info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+
+  // iterate through diagonal matrix, take log
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      auto tmp = (i == j) ? std::log(std::complex<RealType>(eval_r[i], eval_i[i])) : std::complex<RealType>(0.0, 0.0);
+      mat_cd[i + j * n] = tmp;
+
+      if (eval_i[j] > 0.0)
+      {
+        mat_cl[i + j * n]       = std::complex<RealType>(mat_l[i + j * n], mat_l[i + (j + 1) * n]);
+        mat_cl[i + (j + 1) * n] = std::complex<RealType>(mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
+      }
+      else if (!(eval_i[j] < 0.0))
+      {
+        mat_cl[i + j * n] = std::complex<RealType>(mat_l[i + j * n], 0.0);
+      }
+    }
+  }
+
+  RealType one(1.0);
+  RealType zero(0.0);
+  BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n);
+  BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_cd[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in antisymmetric "
+                     "matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl;
+      }
+      output[i][j] = mat_cd[i + n * j].real();
+    }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
+                                          const opt_variables_type& optvars,
+                                          ValueVector& psi,
+                                          const ValueVector& psiinv,
+                                          std::vector<T>& ratios,
+                                          Matrix<T>& dratios,
+                                          int FirstIndex,
+                                          int LastIndex)
+{
+  Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  const ParticleSet& P = VP.getRefPS();
+  int iel              = VP.refPtcl;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_orig;
+  T_orig.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo);
+
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
+
+  ValueVector tmp_psi;
+  tmp_psi.resize(nmo);
+
+  for (int iat = 0; iat < VP.getTotalNum(); iat++)
+  {
+    Phi->evaluateValue(VP, iat, tmp_psi);
+
+    for (int j = 0; j < nmo; j++)
+      psiM_all(iel - FirstIndex, j) = tmp_psi[j];
+
+    for (int i = 0; i < nel; i++)
+      for (int j = 0; j < nel; j++)
+        psiM_inv(i, j) = psiM_all(i, j);
+
+    Invert(psiM_inv.data(), nel, nel);
+
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+
+    // The matrix A is rectangular.  Ainv is the inverse of the square part
+    // of the matrix. The multiply of Ainv and the square part of A is just
+    // the identity. This multiply could be reduced to Ainv and the
+    // non-square part of A.
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+    {
+      int kk = this->myVars.where(i);
+      if (kk >= 0)
+      {
+        const int p      = m_act_rot_inds.at(i).first;
+        const int q      = m_act_rot_inds.at(i).second;
+        dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars)
+      }
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                            const opt_variables_type& optvars,
+                                            Vector<T>& dlogpsi,
+                                            int FirstIndex,
+                                            int LastIndex)
+{
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] = T_mat(p, q);
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+                                          const opt_variables_type& optvars,
+                                          Vector<T>& dlogpsi,
+                                          Vector<T>& dhpsioverpsi,
+                                          const int& FirstIndex,
+                                          const int& LastIndex)
+{
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+  myG_temp.resize(nel);
+  myG_J.resize(nel);
+  myL_temp.resize(nel);
+  myL_J.resize(nel);
+
+  myG_temp = 0;
+  myG_J    = 0;
+  myL_temp = 0;
+  myL_J    = 0;
+
+  Bbar.resize(nel, nmo);
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  Bbar       = 0;
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  // current value of Gradient and Laplacian
+  //  gradient components
+  for (int a = 0; a < nel; a++)
+    for (int i = 0; i < nel; i++)
+      for (int k = 0; k < 3; k++)
+        myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
+  // laplacian components
+  for (int a = 0; a < nel; a++)
+  {
+    for (int i = 0; i < nel; i++)
+      myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
+  }
+
+  // calculation of myG_J which will be used to represent
+  // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+  // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
+  // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
+  // this is what myL_J will hold
+  for (int a = 0, iat = FirstIndex; a < nel; a++, iat++)
+  {
+    myG_J[a] = (P.G[iat] - myG_temp[a]);
+    myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
+  }
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j);
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  const T* const B(Bbar.data());
+  ValueMatrix T_mat;
+  ValueMatrix Y1;
+  ValueMatrix Y2;
+  ValueMatrix Y3;
+  ValueMatrix Y4;
+  T_mat.resize(nel, nmo);
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), nel, T(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] += T_mat(p, q);
+      dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+                                          const opt_variables_type& optvars,
+                                          Vector<T>& dlogpsi,
+                                          Vector<T>& dhpsioverpsi,
+                                          const T& psiCurrent,
+                                          const std::vector<T>& Coeff,
+                                          const std::vector<size_t>& C2node_up,
+                                          const std::vector<size_t>& C2node_dn,
+                                          const ValueVector& detValues_up,
+                                          const ValueVector& detValues_dn,
+                                          const GradMatrix& grads_up,
+                                          const GradMatrix& grads_dn,
+                                          const ValueMatrix& lapls_up,
+                                          const ValueMatrix& lapls_dn,
+                                          const ValueMatrix& M_up,
+                                          const ValueMatrix& M_dn,
+                                          const ValueMatrix& Minv_up,
+                                          const ValueMatrix& Minv_dn,
+                                          const GradMatrix& B_grad,
+                                          const ValueMatrix& B_lapl,
+                                          const std::vector<int>& detData_up,
+                                          const size_t N1,
+                                          const size_t N2,
+                                          const size_t NP1,
+                                          const size_t NP2,
+                                          const std::vector<std::vector<int>>& lookup_tbl)
+{
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    ParticleSet::ParticleGradient myG_temp, myG_J;
+    ParticleSet::ParticleLaplacian myL_temp, myL_J;
+    const int NP = P.getTotalNum();
+    myG_temp.resize(NP);
+    myG_temp = 0.0;
+    myL_temp.resize(NP);
+    myL_temp = 0.0;
+    myG_J.resize(NP);
+    myG_J = 0.0;
+    myL_J.resize(NP);
+    myL_J            = 0.0;
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
+
+    const T* restrict C_p = Coeff.data();
+    for (int i = 0; i < Coeff.size(); i++)
+    {
+      const size_t upC = C2node_up[i];
+      const size_t dnC = C2node_dn[i];
+      const T tmp1     = C_p[i] * detValues_dn[dnC];
+      const T tmp2     = C_p[i] * detValues_up[upC];
+      for (size_t k = 0, j = N1; k < NP1; k++, j++)
+      {
+        myG_temp[j] += tmp1 * grads_up(upC, k);
+        myL_temp[j] += tmp1 * lapls_up(upC, k);
+      }
+      for (size_t k = 0, j = N2; k < NP2; k++, j++)
+      {
+        myG_temp[j] += tmp2 * grads_dn(dnC, k);
+        myL_temp[j] += tmp2 * lapls_dn(dnC, k);
+      }
+    }
+
+    myG_temp *= (1 / psiCurrent);
+    myL_temp *= (1 / psiCurrent);
+
+    // calculation of myG_J which will be used to represent
+    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
+    // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
+    // \psi}{\psi} and this is what myL_J will hold
+    for (int iat = 0; iat < (myL_temp.size()); iat++)
+    {
+      myG_J[iat] = (P.G[iat] - myG_temp[iat]);
+      myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+    }
+
+    table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn,
+                      detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn,
+                      B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                            const opt_variables_type& optvars,
+                                            Vector<T>& dlogpsi,
+                                            const FullRealType& psiCurrent,
+                                            const std::vector<T>& Coeff,
+                                            const std::vector<size_t>& C2node_up,
+                                            const std::vector<size_t>& C2node_dn,
+                                            const ValueVector& detValues_up,
+                                            const ValueVector& detValues_dn,
+                                            const ValueMatrix& M_up,
+                                            const ValueMatrix& M_dn,
+                                            const ValueMatrix& Minv_up,
+                                            const ValueMatrix& Minv_dn,
+                                            const std::vector<int>& detData_up,
+                                            const std::vector<std::vector<int>>& lookup_tbl)
+{
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
+
+    table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up,
+                        M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl);
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi,
+                                        Vector<T>& dhpsioverpsi,
+                                        const ParticleSet::ParticleLaplacian& myL_J,
+                                        const ParticleSet::ParticleGradient& myG_J,
+                                        const size_t nel,
+                                        const size_t nmo,
+                                        const T& psiCurrent,
+                                        const std::vector<T>& Coeff,
+                                        const std::vector<size_t>& C2node_up,
+                                        const std::vector<size_t>& C2node_dn,
+                                        const ValueVector& detValues_up,
+                                        const ValueVector& detValues_dn,
+                                        const GradMatrix& grads_up,
+                                        const GradMatrix& grads_dn,
+                                        const ValueMatrix& lapls_up,
+                                        const ValueMatrix& lapls_dn,
+                                        const ValueMatrix& M_up,
+                                        const ValueMatrix& M_dn,
+                                        const ValueMatrix& Minv_up,
+                                        const ValueMatrix& Minv_dn,
+                                        const GradMatrix& B_grad,
+                                        const ValueMatrix& B_lapl,
+                                        const std::vector<int>& detData_up,
+                                        const size_t N1,
+                                        const size_t N2,
+                                        const size_t NP1,
+                                        const size_t NP2,
+                                        const std::vector<std::vector<int>>& lookup_tbl)
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+GUIDE TO THE MATICES BEING BUILT
+----------------------------------------------
+The idea here is that there is a loop over all unique determinants. For each
+determiant the table method is employed to calculate the contributions to the
+parameter derivatives (dhpsioverpsi/dlogpsi)
+
+  loop through unquie determinants
+	loop through parameters
+	  evaluate contributaion to dlogpsi and dhpsioverpsi
+\noindent
+
+  BLAS GUIDE  for matrix multiplication of  [  alpha * A.B + beta * C = C ]
+  Matrix A is of dimensions a1,a2 and Matrix B is b1,b2   in which a2=b1
+  The BLAS command is as follows...
+
+ BLAS::gemm('N','N', b2, a1, a2 ,alpha, B, b2, A, a2, beta, C, b2);
+
+Below is a human readable format for the matrix multiplications performed
+below...
+
+This notation is inspired by http://dx.doi.org/10.1063/1.4948778
+\newline
+\hfill\break
+$
+	A_{i,j}=\phi_j(r_{i}) \\
+	T = A^{-1} \widetilde{A} \\
+	B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+\phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\
+	\hat{O_{I}} = \hat{O}D_{I} \\
+	D_{I}=det(A_{I}) \newline
+	\psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
+	\Psi_{total} = \psi_{J}\psi_{MS} \\
+	\alpha_{I} = P^{T}_{I}TQ_{I} \\
+	M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
+A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\
+$
+\newline
+There are three constants I use in the expressions for dhpsioverpsi and dlogpsi
+\newline
+\hfill\break
+$
+  const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})*
+det(\alpha_{I\uparrow}) \\
+  const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1}
+C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\
+  const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})*
+Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\
+$
+\newline
+Below is a translation of the shorthand I use to represent matrices independent
+of ``excitation matrix". \newline \hfill\break
+$
+	Y_{1} =  A^{-1}B   \\
+	Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
+	Y_{3} = A^{-1}\widetilde{B} \\
+	Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
+)\\
+$
+\newline
+Below is a translation of the shorthand I use to represent matrices dependent on
+``excitation" with respect to the reference Matrix and sums of matrices. Above
+this line I have represented these excitation matrices with a subscript ``I" but
+from this point on The subscript will be omitted and it is clear that whenever a
+matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The
+reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix.
+\newline
+\hfill\break
+$
+	Y_{5} = TQ \\
+	Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
+	Y_{7} = \alpha_{I}^{-1} P^{T} \\
+	Y_{11} = \widetilde{M}Q \\
+	Y_{23} = P^{T}\widetilde{M}Q \\
+	Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
+	Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
+	Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
+$
+\newline
+So far you will notice that I have not included up or down arrows to specify
+what spin the matrices are of. This is because we are calculating the derivative
+of all up or all down spin orbital rotation parameters at a time. If we are
+finding the up spin derivatives then any term that is down spin will be
+constant. The following assumes that we are taking up-spin MO rotation parameter
+derivatives. Of course the down spin expression can be retrieved by swapping the
+up and down arrows. I have dubbed any expression with lowercase p prefix as a
+"precursor" to an expression actually used... \newline \hfill\break
+$
+	\dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
+	\ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
+	pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
+(Q\alpha_{I}^{-1}P^{T}) \\
+	pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+	pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
+\alpha_{I}^{-1}P^{T}) \\
+$
+\newline
+Now these p matrices will be used to make various expressions via BLAS commands.
+\newline
+\hfill\break
+$
+	K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\
+	TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
+	K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
+	TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
+	K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\
+	TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\
+	K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}T) \\
+	TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
+det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\
+	MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
+det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\
+	K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+(Q\alpha_{I}^{-1}P^{T}T) \\
+	TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+(TQ\alpha_{I}^{-1}P^{T}T)\\ \\
+	K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
+	TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
+\\ \\
+	K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
+M_{I} \alpha_{I}^{-1}P^{T} T)  \\
+	TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
+M_{I} \alpha_{I}^{-1}P^{T} T)  \\
+$
+\newline
+Now with all these matrices and constants the expressions of dhpsioverpsi and
+dlogpsi can be created.
+
+
+
+
+In addition I will be using a special generalization of the kinetic operator
+which I will denote as O. Our Slater matrix with the special O operator applied
+to each element will be called B_bar
+
+$
+``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+\phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i})
+$
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+{
+  ValueMatrix Table;
+  ValueMatrix Bbar;
+  ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
+  ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  pK1.resize(nmo, nel);
+  K1T.resize(nmo, nmo);
+  TK1T.resize(nel, nmo);
+
+  pK2.resize(nmo, nel);
+  K2AiB.resize(nmo, nmo);
+  TK2AiB.resize(nel, nmo);
+  K2XA.resize(nmo, nmo);
+  TK2XA.resize(nel, nmo);
+  K2T.resize(nmo, nmo);
+  TK2T.resize(nel, nmo);
+  MK2T.resize(nel, nmo);
+
+  pK3.resize(nmo, nel);
+  K3T.resize(nmo, nmo);
+  TK3T.resize(nel, nmo);
+
+  pK5.resize(nmo, nel);
+  K5T.resize(nmo, nmo);
+  TK5T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+  // B_grad holds the gradient operator
+  // B_lapl holds the laplacian operator
+  // B_bar will hold our special O operator
+
+  const int offset1(N1);
+  const int offset2(N2);
+  const int NPother(NP2);
+
+  T* T_(Table.data());
+
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j);
+
+  const T* restrict B(Bbar.data());
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  // Need to create the constants: (Oi, const0, const1, const2)to take
+  // advantage of minimal BLAS commands; Oi is the special operator applied to
+  // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
+  //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
+  //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
+  std::vector<RealType> Oi(num_unique_dn_dets);
+
+  for (int index = 0; index < num_unique_dn_dets; index++)
+    for (int iat = 0; iat < NPother; iat++)
+      Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
+          myL_J[offset2 + iat] * detValues_dn[index];
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
+  // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
+  // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
+  // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+    const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK1.begin(), pK1.end(), 0.0);
+  std::fill(pK2.begin(), pK2.end(), 0.0);
+  std::fill(pK3.begin(), pK3.end(), 0.0);
+  std::fill(pK5.begin(), pK5.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
+    }
+
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y11.resize(nel, k);
+      Y23.resize(k, k);
+      Y24.resize(k, k);
+      Y25.resize(k, k);
+      Y26.resize(k, nel);
+
+      std::fill(Y11.begin(), Y11.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k);
+      }
+
+      std::fill(Y23.begin(), Y23.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1);
+      }
+
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k);
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k);
+
+      Y26.resize(k, nel);
+
+      std::fill(Y26.begin(), Y26.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      // c_Tr_AlphaI_MI is a constant contributing to constant const2
+      // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
+      RealType c_Tr_AlphaI_MI = 0.0;
+      for (int i = 0; i < k; i++)
+      {
+        c_Tr_AlphaI_MI += Y24(i, i);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
+        const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]);
+        const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]);
+
+        const2 += alpha_1;
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+        }
+      }
+      datum += 3 * k + 1;
+    }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(),
+             nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(),
+             nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * Y4(i, j) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
+                   K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) -
+                   const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * (Y4(i, j) - Y4(j, i)) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
+                   TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) +
+                   K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) +
+                   const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
+      }
+      else
+      {
+        dhpsioverpsi[kk] += T(-0.5 *
+                              (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i)
+
+                               + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) +
+                               K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i)));
+      }
+    }
+  }
+}
+
+template<typename T>
+void RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi,
+                                          const size_t nel,
+                                          const size_t nmo,
+                                          const T& psiCurrent,
+                                          const std::vector<T>& Coeff,
+                                          const std::vector<size_t>& C2node_up,
+                                          const std::vector<size_t>& C2node_dn,
+                                          const ValueVector& detValues_up,
+                                          const ValueVector& detValues_dn,
+                                          const ValueMatrix& M_up,
+                                          const ValueMatrix& M_dn,
+                                          const ValueMatrix& Minv_up,
+                                          const ValueMatrix& Minv_dn,
+                                          const std::vector<int>& detData_up,
+                                          const std::vector<std::vector<int>>& lookup_tbl)
+{
+  ValueMatrix Table;
+  ValueMatrix Y5, Y6, Y7;
+  ValueMatrix pK4, K4T, TK4T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  pK4.resize(nmo, nel);
+  K4T.resize(nmo, nmo);
+  TK4T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+
+  T* T_(Table.data());
+
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK4.begin(), pK4.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
+    }
+
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent));
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+        }
+      }
+      datum += 3 * k + 1;
+    }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dlogpsi[kk] +=
+            T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) +
+                         (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
+      }
+      else
+      {
+        dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
+      }
+    }
+  }
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<RotatedSPOsT>(SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
+
+  myclone->params          = this->params;
+  myclone->params_supplied = this->params_supplied;
+  myclone->m_act_rot_inds  = this->m_act_rot_inds;
+  myclone->m_full_rot_inds = this->m_full_rot_inds;
+  myclone->myVars          = this->myVars;
+  myclone->myVarsFull      = this->myVarsFull;
+  myclone->history_params_ = this->history_params_;
+  myclone->use_global_rot_ = this->use_global_rot_;
+  return myclone;
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                           const RefVector<ValueVector>& psi_list,
+                                           const std::vector<const T*>& invRow_ptr_list,
+                                           std::vector<std::vector<T>>& ratios_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSet>& P_list,
+                                       int iat,
+                                       const RefVector<ValueVector>& psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                     int iat,
+                                     const RefVector<ValueVector>& psi_v_list,
+                                     const RefVector<GradVector>& dpsi_v_list,
+                                     const RefVector<ValueVector>& d2psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                             int iat,
+                                             const RefVector<ValueVector>& psi_v_list,
+                                             const RefVector<GradVector>& dpsi_v_list,
+                                             const RefVector<ValueVector>& d2psi_v_list,
+                                             OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                     const RefVectorWithLeader<ParticleSet>& P_list,
+                                                     int iat,
+                                                     const std::vector<const T*>& invRow_ptr_list,
+                                                     OffloadMWVGLArray& phi_vgl_v,
+                                                     std::vector<T>& ratios,
+                                                     std::vector<GradType>& grads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                             const RefVectorWithLeader<ParticleSet>& P_list,
+                                                             int iat,
+                                                             const std::vector<const T*>& invRow_ptr_list,
+                                                             OffloadMWVGLArray& phi_vgl_v,
+                                                             std::vector<T>& ratios,
+                                                             std::vector<GradType>& grads,
+                                                             std::vector<T>& spingrads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads,
+                                                spingrads);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int first,
+                                              int last,
+                                              const RefVector<ValueMatrix>& logdet_list,
+                                              const RefVector<GradMatrix>& dlogdet_list,
+                                              const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::createResource(ResourceCollection& collection) const
+{
+  Phi->createResource(collection);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::acquireResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.acquireResource(collection, phi_list);
+}
+
+template<typename T>
+void RotatedSPOsT<T>::releaseResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.releaseResource(collection, phi_list);
+}
+
+template<typename T>
+RefVectorWithLeader<SPOSetT<T>> RotatedSPOsT<T>::extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list)
+{
+  auto& spo_leader = spo_list.template getCastedLeader<RotatedSPOsT<T>>();
+  const auto nw    = spo_list.size();
+  RefVectorWithLeader<SPOSetT<T>> phi_list(*spo_leader.Phi);
+  phi_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RotatedSPOsT<T>& rot = spo_list.template getCastedElement<RotatedSPOsT<T>>(iw);
+    phi_list.emplace_back(*rot.Phi);
+  }
+  return phi_list;
+}
+
+// Class concrete types from ValueType
+template class RotatedSPOsT<double>;
+template class RotatedSPOsT<float>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
new file mode 100644
index 0000000000..1cdd5246b5
--- /dev/null
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -0,0 +1,487 @@
+//////////////////////////////////////////////////////////////////////////////////////
+//// This file is distributed under the University of Illinois/NCSA Open Source
+/// License. / See LICENSE file in top directory for details.
+////
+//// Copyright (c) QMCPACK developers.
+////
+//// File developed by: Sergio D. Pineda Flores,
+/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
+/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
+/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+////
+//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
+/// University of California, Berkeley
+////////////////////////////////////////////////////////////////////////////////////////
+#ifndef QMCPLUSPLUS_ROTATEDSPOST_H
+#define QMCPLUSPLUS_ROTATEDSPOST_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<typename T>
+class RotatedSPOsT;
+namespace testing
+{
+opt_variables_type& getMyVarsFull(RotatedSPOsT<double>& rot);
+opt_variables_type& getMyVarsFull(RotatedSPOsT<float>& rot);
+std::vector<std::vector<double>>& getHistoryParams(RotatedSPOsT<double>& rot);
+std::vector<std::vector<float>>& getHistoryParams(RotatedSPOsT<float>& rot);
+} // namespace testing
+
+template<class T>
+class RotatedSPOsT : public SPOSetT<T>, public OptimizableObject
+{
+public:
+  using IndexType         = typename SPOSetT<T>::IndexType;
+  using RealType          = typename SPOSetT<T>::RealType;
+  using FullRealType      = typename SPOSetT<T>::FullRealType;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using HessVector        = typename SPOSetT<T>::HessVector;
+  using HessMatrix        = typename SPOSetT<T>::HessMatrix;
+  using GGGVector         = typename SPOSetT<T>::GGGVector;
+  using GGGMatrix         = typename SPOSetT<T>::GGGMatrix;
+  using OffloadMWVGLArray = typename SPOSetT<T>::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename SPOSetT<T>::OffloadMWVArray;
+  template<typename DT>
+  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
+
+  // constructor
+  RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
+  // destructor
+  ~RotatedSPOsT() override;
+
+  std::string getClassName() const override { return "RotatedSPOsT"; }
+  bool isOptimizable() const override { return true; }
+  bool isOMPoffload() const override { return Phi->isOMPoffload(); }
+  bool hasIonDerivs() const override { return Phi->hasIonDerivs(); }
+
+  // Vector of rotation matrix indices
+  using RotationIndices = std::vector<std::pair<int, int>>;
+
+  // Active orbital rotation parameter indices
+  RotationIndices m_act_rot_inds;
+
+  // Full set of rotation values for global rotation
+  RotationIndices m_full_rot_inds;
+
+  // Construct a list of the matrix indices for non-zero rotation parameters.
+  // (The structure for a sparse representation of the matrix)
+  // Only core->active rotations are created.
+  static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Construct a list for all the matrix indices, including core->active,
+  // core->core and active->active
+  static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Fill in antisymmetric matrix from the list of rotation parameter indices
+  // and a list of parameter values.
+  // This function assumes rot_mat is properly sized upon input and is set to
+  // zero.
+  static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                           const std::vector<RealType>& param,
+                                           ValueMatrix& rot_mat);
+
+  // Extract the list of rotation parameters from the entries in an
+  // antisymmetric matrix This function expects rot_indices and param are the
+  // same length.
+  static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const ValueMatrix& rot_mat,
+                                                   std::vector<RealType>& param);
+
+  // function to perform orbital rotations
+  void apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
+
+  // For global rotation, inputs are the old parameters and the delta
+  // parameters. The corresponding rotation matrices are constructed,
+  // multiplied together, and the new parameters extracted. The new rotation
+  // is applied to the underlying SPO coefficients
+  void applyDeltaRotation(const std::vector<RealType>& delta_param,
+                          const std::vector<RealType>& old_param,
+                          std::vector<RealType>& new_param);
+
+  // Perform the construction of matrices and extraction of parameters for a
+  // delta rotation. Split out and made static for testing.
+  static void constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                     const std::vector<RealType>& old_param,
+                                     const RotationIndices& act_rot_inds,
+                                     const RotationIndices& full_rot_inds,
+                                     std::vector<RealType>& new_param,
+                                     ValueMatrix& new_rot_mat);
+
+  // When initializing the rotation from VP files
+  // This function applies the rotation history
+  void applyRotationHistory();
+
+  // This function applies the global rotation (similar to apply_rotation, but
+  // for the full set of rotation parameters)
+  void applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy);
+
+  // Compute matrix exponential of an antisymmetric matrix (result is rotation
+  // matrix)
+  static void exponentiate_antisym_matrix(ValueMatrix& mat);
+
+  // Compute matrix log of rotation matrix to produce antisymmetric matrix
+  static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
+
+  // A particular SPOSet used for Orbitals
+  std::unique_ptr<SPOSetT<T>> Phi;
+
+  /// Set the rotation parameters (usually from input file)
+  void setRotationParameters(const std::vector<RealType>& param_list);
+
+  /// the number of electrons of the majority spin
+  size_t nel_major_;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
+  // Determinant part of the wfn myG_J is the Gradient of the all other parts
+  // of the wavefunction (typically just the Jastrow).
+  //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
+  // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
+  // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
+  // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
+  // will hold
+  ParticleSet::ParticleGradient myG_temp, myG_J;
+  ParticleSet::ParticleLaplacian myL_temp, myL_J;
+
+  ValueMatrix Bbar;
+  ValueMatrix psiM_inv;
+  ValueMatrix psiM_all;
+  GradMatrix dpsiM_all;
+  ValueMatrix d2psiM_all;
+
+  // Single Slater creation
+  void buildOptVariables(size_t nel);
+
+  // For the MSD case rotations must be created in MultiSlaterDetTableMethod
+  // class
+  void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations);
+
+  void evaluateDerivatives(ParticleSet& P,
+                           const opt_variables_type& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const int& FirstIndex,
+                           const int& LastIndex) override;
+
+  void evaluateDerivativesWF(ParticleSet& P,
+                             const opt_variables_type& optvars,
+                             Vector<T>& dlogpsi,
+                             int FirstIndex,
+                             int LastIndex) override;
+
+  void evaluateDerivatives(ParticleSet& P,
+                           const opt_variables_type& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const GradMatrix& grads_up,
+                           const GradMatrix& grads_dn,
+                           const ValueMatrix& lapls_up,
+                           const ValueMatrix& lapls_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const GradMatrix& B_grad,
+                           const ValueMatrix& B_lapl,
+                           const std::vector<int>& detData_up,
+                           const size_t N1,
+                           const size_t N2,
+                           const size_t NP1,
+                           const size_t NP2,
+                           const std::vector<std::vector<int>>& lookup_tbl) override;
+
+  void evaluateDerivativesWF(ParticleSet& P,
+                             const opt_variables_type& optvars,
+                             Vector<T>& dlogpsi,
+                             const FullRealType& psiCurrent,
+                             const std::vector<T>& Coeff,
+                             const std::vector<size_t>& C2node_up,
+                             const std::vector<size_t>& C2node_dn,
+                             const ValueVector& detValues_up,
+                             const ValueVector& detValues_dn,
+                             const ValueMatrix& M_up,
+                             const ValueMatrix& M_dn,
+                             const ValueMatrix& Minv_up,
+                             const ValueMatrix& Minv_dn,
+                             const std::vector<int>& detData_up,
+                             const std::vector<std::vector<int>>& lookup_tbl) override;
+
+  // helper function to evaluatederivative; evaluate orbital rotation
+  // parameter derivative using table method
+  void table_method_eval(Vector<T>& dlogpsi,
+                         Vector<T>& dhpsioverpsi,
+                         const ParticleSet::ParticleLaplacian& myL_J,
+                         const ParticleSet::ParticleGradient& myG_J,
+                         const size_t nel,
+                         const size_t nmo,
+                         const T& psiCurrent,
+                         const std::vector<T>& Coeff,
+                         const std::vector<size_t>& C2node_up,
+                         const std::vector<size_t>& C2node_dn,
+                         const ValueVector& detValues_up,
+                         const ValueVector& detValues_dn,
+                         const GradMatrix& grads_up,
+                         const GradMatrix& grads_dn,
+                         const ValueMatrix& lapls_up,
+                         const ValueMatrix& lapls_dn,
+                         const ValueMatrix& M_up,
+                         const ValueMatrix& M_dn,
+                         const ValueMatrix& Minv_up,
+                         const ValueMatrix& Minv_dn,
+                         const GradMatrix& B_grad,
+                         const ValueMatrix& B_lapl,
+                         const std::vector<int>& detData_up,
+                         const size_t N1,
+                         const size_t N2,
+                         const size_t NP1,
+                         const size_t NP2,
+                         const std::vector<std::vector<int>>& lookup_tbl);
+
+  void table_method_evalWF(Vector<T>& dlogpsi,
+                           const size_t nel,
+                           const size_t nmo,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const std::vector<int>& detData_up,
+                           const std::vector<std::vector<int>>& lookup_tbl);
+
+  void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); }
+
+  void checkInVariablesExclusive(opt_variables_type& active) override
+  {
+    if (this->myVars.size())
+      active.insertFrom(this->myVars);
+  }
+
+  void checkOutVariables(const opt_variables_type& active) override { this->myVars.getIndex(active); }
+
+  /// reset
+  void resetParametersExclusive(const opt_variables_type& active) override;
+
+  void writeVariationalParameters(hdf_archive& hout) override;
+
+  void readVariationalParameters(hdf_archive& hin) override;
+
+  //*********************************************************************************
+  // the following functions simply call Phi's corresponding functions
+  void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); }
+
+  void checkObject() const override { Phi->checkObject(); }
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateValue(P, iat, psi);
+  }
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
+  }
+
+  void evaluateDetRatios(const VirtualParticleSet& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) override
+  {
+    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+  }
+
+  void evaluateDerivRatios(const VirtualParticleSet& VP,
+                           const opt_variables_type& optvars,
+                           ValueVector& psi,
+                           const ValueVector& psiinv,
+                           std::vector<T>& ratios,
+                           Matrix<T>& dratios,
+                           int FirstIndex,
+                           int LastIndex) override;
+
+  void evaluateVGH(const ParticleSet& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
+  }
+
+  void evaluateVGHGH(const ParticleSet& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override
+  {
+    Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
+  }
+
+  void evaluateGradSource(const ParticleSet& P,
+                          int first,
+                          int last,
+                          const ParticleSet& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi);
+  }
+
+  //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
+  //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
+  //  last, grad_grad_grad_logdet); }
+
+  /// Use history list (false) or global rotation (true)
+  void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
+
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const T*>& invRow_ptr_list,
+                            std::vector<std::vector<T>>& ratios_list) const override;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSet>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<T>& ratios,
+                                      std::vector<GradType>& grads) const override;
+
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<T>& spingrads) const override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+private:
+  /// true if SPO parameters (orbital rotation parameters) have been supplied
+  /// by input
+  bool params_supplied;
+  /// list of supplied orbital rotation parameters
+  std::vector<RealType> params;
+
+  /// Full set of rotation matrix parameters for use in global rotation method
+  opt_variables_type myVarsFull;
+
+  /// timer for apply_rotation
+  NewTimer& apply_rotation_timer_;
+
+  /// List of previously applied parameters
+  std::vector<std::vector<RealType>> history_params_;
+
+  static RefVectorWithLeader<SPOSetT<T>> extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list);
+
+  /// Use global rotation or history list
+  bool use_global_rot_ = true;
+
+  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<double>& rot);
+  friend opt_variables_type& testing::getMyVarsFull(RotatedSPOsT<float>& rot);
+  friend std::vector<std::vector<double>>& testing::getHistoryParams(RotatedSPOsT<double>& rot);
+  friend std::vector<std::vector<float>>& testing::getHistoryParams(RotatedSPOsT<float>& rot);
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.cpp b/src/QMCWaveFunctions/SPOSetBuilderT.cpp
new file mode 100644
index 0000000000..c682d6a77a
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetBuilderT.cpp
@@ -0,0 +1,187 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "SPOSetBuilderT.h"
+#include "OhmmsData/AttributeSet.h"
+#include <Message/UniformCommunicateError.h>
+
+#ifndef QMC_COMPLEX
+#include "QMCWaveFunctions/RotatedSPOsT.h"
+#endif
+
+namespace qmcplusplus
+{
+template<typename T>
+SPOSetBuilderT<T>::SPOSetBuilderT(const std::string& type_name, Communicate* comm)
+    : MPIObjectBase(comm), legacy(true), type_name_(type_name)
+{
+  reserve_states();
+}
+
+template<typename T>
+void SPOSetBuilderT<T>::reserve_states(int nsets)
+{
+  int sets_needed = nsets - states.size();
+  if (sets_needed > 0)
+    for (int s = 0; s < sets_needed; ++s)
+      states.push_back(std::make_unique<SPOSetInfo>());
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info)
+{
+  myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented");
+  return 0;
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur)
+{
+  std::string spo_object_name;
+  std::string optimize("no");
+
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_object_name, "id");
+  attrib.add(spo_object_name, "name");
+  attrib.add(optimize, "optimize");
+  attrib.put(cur);
+
+  app_summary() << std::endl;
+  app_summary() << "     Single particle orbitals (SPO)" << std::endl;
+  app_summary() << "     ------------------------------" << std::endl;
+  app_summary() << "      Name: " << spo_object_name << "   Type: " << type_name_
+                << "   Builder class name: " << ClassName << std::endl;
+  app_summary() << std::endl;
+
+  if (spo_object_name.empty())
+    myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!");
+
+  // read specialized sposet construction requests
+  //   and translate them into a set of orbital indices
+  SPOSetInputInfo input_info(cur);
+
+  // process general sposet construction requests
+  //   and preserve legacy interface
+  std::unique_ptr<SPOSetT<T>> sposet;
+
+  try
+  {
+    if (legacy && input_info.legacy_request)
+      sposet = createSPOSetFromXML(cur);
+    else
+      sposet = createSPOSet(cur, input_info);
+  }
+  catch (const UniformCommunicateError& ue)
+  {
+    myComm->barrier_and_abort(ue.what());
+  }
+
+  if (!sposet)
+    myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed");
+
+  if (optimize == "rotation" || optimize == "yes")
+  {
+#ifdef QMC_COMPLEX
+    app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n";
+    abort();
+#else
+    app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead"
+                  << std::endl;
+
+    sposet->storeParamsBeforeRotation();
+    // create sposet with rotation
+    auto& sposet_ref = *sposet;
+    app_log() << "  SPOSet " << sposet_ref.getName() << " is optimizable\n";
+    if (!sposet_ref.isRotationSupported())
+      myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" +
+                                sposet_ref.getClassName() + "'.");
+    auto rot_spo    = std::make_unique<RotatedSPOsT<T>>(sposet_ref.getName(), std::move(sposet));
+    xmlNodePtr tcur = cur->xmlChildrenNode;
+    while (tcur != NULL)
+    {
+      std::string cname((const char*)(tcur->name));
+      if (cname == "opt_vars")
+      {
+        std::vector<RealType> params;
+        putContent(params, tcur);
+        rot_spo->setRotationParameters(params);
+      }
+      tcur = tcur->next;
+    }
+    sposet = std::move(rot_spo);
+#endif
+  }
+
+  if (sposet->getName().empty())
+    app_warning() << "SPOSet object doesn't have a name." << std::endl;
+  if (!spo_object_name.empty() && sposet->getName() != spo_object_name)
+    app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name
+                  << "   object name: " << sposet->getName() << std::endl;
+
+  sposet->checkObject();
+  return sposet;
+}
+
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SPOSetBuilderT<T>::createRotatedSPOSet(xmlNodePtr cur)
+{
+  std::string spo_object_name;
+  std::string method;
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_object_name, "name");
+  attrib.add(method, "method", {"global", "history"});
+  attrib.put(cur);
+
+
+#ifdef QMC_COMPLEX
+  myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet.");
+  return nullptr;
+#else
+  std::unique_ptr<SPOSetT<T>> sposet;
+  processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) {
+    if (cname == "sposet")
+    {
+      sposet = createSPOSet(element);
+    }
+  });
+
+  if (!sposet)
+    myComm->barrier_and_abort("Rotated SPO needs an SPOset");
+
+  if (!sposet->isRotationSupported())
+    myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" +
+                              sposet->getClassName() + "'.");
+
+  sposet->storeParamsBeforeRotation();
+  auto rot_spo = std::make_unique<RotatedSPOsT<T>>(spo_object_name, std::move(sposet));
+
+  if (method == "history")
+    rot_spo->set_use_global_rotation(false);
+
+  processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) {
+    if (cname == "opt_vars")
+    {
+      std::vector<RealType> params;
+      putContent(params, element);
+      rot_spo->setRotationParameters(params);
+    }
+  });
+  return rot_spo;
+#endif
+}
+template class SPOSetBuilderT<double>;
+template class SPOSetBuilderT<float>;
+template class SPOSetBuilderT<std::complex<double>>;
+template class SPOSetBuilderT<std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.h b/src/QMCWaveFunctions/SPOSetBuilderT.h
new file mode 100644
index 0000000000..060451a94d
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetBuilderT.h
@@ -0,0 +1,93 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SPOSetBuilderT.h
+ * @brief Declaration of a base class of SPOSet Builders
+ */
+#ifndef QMCPLUSPLUS_SPOSET_BUILDERT_H
+#define QMCPLUSPLUS_SPOSET_BUILDERT_H
+
+#include <memory>
+#include <vector>
+#include <string>
+#include "Message/MPIObjectBase.h"
+#include "QMCWaveFunctions/SPOSetInfo.h"
+#include "QMCWaveFunctions/SPOSetInputInfo.h"
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "hdf/hdf_archive.h"
+
+namespace qmcplusplus
+{
+/** base class for the real SPOSet builder
+ *
+ * \warning {
+ * We have not quite figured out how to use real/complex efficiently.
+ * There are three cases we have to deal with
+ * - real basis functions and real coefficients
+ * - real basis functions and complex coefficients
+ * - complex basis functions and complex coefficients
+ * For now, we decide to keep both real and complex basis sets and expect
+ * the user classes {\bf KNOW} what they need to use.
+ * }
+ */
+template<typename T>
+class SPOSetBuilderT : public QMCTraits, public MPIObjectBase
+{
+public:
+  using RealType   = typename SPOSetT<T>::RealType;
+  using indices_t  = std::vector<int>;
+  using energies_t = std::vector<RealType>;
+
+  /// whether implementation conforms only to legacy standard
+  bool legacy;
+
+  /// state info of all possible states available in the basis
+  std::vector<std::unique_ptr<SPOSetInfo>> states;
+
+  SPOSetBuilderT(const std::string& type_name, Communicate* comm);
+  virtual ~SPOSetBuilderT() {}
+
+  /// reserve space for states (usually only one set, multiple for e.g. spin dependent einspline)
+  void reserve_states(int nsets = 1);
+
+  /// allow modification of state information
+  inline void modify_states(int index = 0) { states[index]->modify(); }
+
+  /// clear state information
+  inline void clear_states(int index = 0) { states[index]->clear(); }
+
+  /// create an sposet from xml and save the resulting SPOSet
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur);
+
+  /// create orbital rotation transformation from xml and save the resulting SPOSet
+  std::unique_ptr<SPOSetT<T>> createRotatedSPOSet(xmlNodePtr cur);
+
+  const std::string& getTypeName() const { return type_name_; }
+
+protected:
+  /// create an sposet from xml (legacy)
+  virtual std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) = 0;
+
+  /// create an sposet from a general xml request
+  virtual std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info);
+
+  /// type name of the SPO objects built by this builder.
+  const std::string type_name_;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetInfo.h b/src/QMCWaveFunctions/SPOSetInfo.h
index 9653dc73f8..8ee31c909d 100644
--- a/src/QMCWaveFunctions/SPOSetInfo.h
+++ b/src/QMCWaveFunctions/SPOSetInfo.h
@@ -130,6 +130,8 @@ class SPOSetInfo
   void clear();
 
   friend class SPOSetBuilder;
+  template<typename T>
+  friend class SPOSetBuilderT;
 };
 
 
diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
new file mode 100644
index 0000000000..34c76bad82
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -0,0 +1,438 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SPOSetT.h"
+
+#include "CPU/SIMD/simd.hpp" // simd::dot
+
+namespace qmcplusplus
+{
+
+template<class T>
+SPOSetT<T>::SPOSetT(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0)
+{}
+
+template<class T>
+void SPOSetT<T>::extractOptimizableObjectRefs(UniqueOptObjRefs&)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::extractOptimizableObjectRefs "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::checkOutVariables "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDetRatios(const VirtualParticleSet& VP,
+                                   ValueVector& psi,
+                                   const ValueVector& psiinv,
+                                   std::vector<T>& ratios)
+{
+  assert(psi.size() == psiinv.size());
+  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+  {
+    evaluateValue(VP, iat, psi);
+    ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size());
+  }
+}
+
+
+template<class T>
+void SPOSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                      const RefVector<ValueVector>& psi_list,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      std::vector<std::vector<T>>& ratios_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+  {
+    Vector<T> invRow(const_cast<T*>(invRow_ptr_list[iw]), psi_list[iw].get().size());
+    spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]);
+  }
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGL_spin(const ParticleSet& P,
+                                  int iat,
+                                  ValueVector& psi,
+                                  GradVector& dpsi,
+                                  ValueVector& d2psi,
+                                  ValueVector& dspin)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin");
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                const RefVector<ValueVector>& psi_v_list,
+                                const RefVector<GradVector>& dpsi_v_list,
+                                const RefVector<ValueVector>& d2psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]);
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                  const RefVectorWithLeader<ParticleSet>& P_list,
+                                  int iat,
+                                  const RefVector<ValueVector>& psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]);
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                        const RefVectorWithLeader<ParticleSet>& P_list,
+                                        int iat,
+                                        const RefVector<ValueVector>& psi_v_list,
+                                        const RefVector<GradVector>& dpsi_v_list,
+                                        const RefVector<ValueVector>& d2psi_v_list,
+                                        OffloadMatrix<ComplexType>& mw_dspin) const
+{
+  throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n");
+}
+
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                                int iat,
+                                                const std::vector<const T*>& invRow_ptr_list,
+                                                OffloadMWVGLArray& phi_vgl_v,
+                                                std::vector<T>& ratios,
+                                                std::vector<GradType>& grads) const
+{
+  assert(this == &spo_list.getLeader());
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+  GradVector dphi_v(norb_requested);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested);
+    ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested);
+    spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v);
+
+    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested);
+    grads[iw]  = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw];
+
+    // transpose the array of gradients to SoA in phi_vgl_v
+    for (size_t idim = 0; idim < DIM; idim++)
+    {
+      T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0);
+      for (size_t iorb = 0; iorb < norb_requested; iorb++)
+        phi_g[iorb] = dphi_v[iorb][idim];
+    }
+  }
+  phi_vgl_v.updateTo();
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                        const RefVectorWithLeader<ParticleSet>& P_list,
+                                                        int iat,
+                                                        const std::vector<const T*>& invRow_ptr_list,
+                                                        OffloadMWVGLArray& phi_vgl_v,
+                                                        std::vector<T>& ratios,
+                                                        std::vector<GradType>& grads,
+                                                        std::vector<T>& spingrads) const
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& logdet,
+                                           GradMatrix& dlogdet,
+                                           ValueMatrix& d2logdet,
+                                           ValueMatrix& dspinlogdet)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                         const RefVectorWithLeader<ParticleSet>& P_list,
+                                         int first,
+                                         int last,
+                                         const RefVector<ValueMatrix>& logdet_list,
+                                         const RefVector<GradMatrix>& dlogdet_list,
+                                         const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  assert(this == &spo_list.getLeader());
+  for (int iw = 0; iw < spo_list.size(); iw++)
+    spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]);
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet,
+                                      GGGMatrix& grad_grad_grad_logdet)
+{
+  throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n");
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SPOSetT<T>::makeClone() const
+{
+  throw std::runtime_error("Missing  SPOSet::makeClone for " + getClassName());
+}
+
+template<class T>
+void SPOSetT<T>::basic_report(const std::string& pad) const
+{
+  app_log() << pad << "size = " << size() << std::endl;
+  app_log() << pad << "state info:" << std::endl;
+  //states.report(pad+"  ");
+  app_log().flush();
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGH(const ParticleSet& P,
+                             int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateVGHGH(const ParticleSet& P,
+                               int iat,
+                               ValueVector& psi,
+                               GradVector& dpsi,
+                               HessVector& grad_grad_psi,
+                               GGGVector& grad_grad_grad_psi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n");
+}
+
+template<class T>
+void SPOSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  if (isRotationSupported())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::applyRotation "
+                           "must be overloaded when the SPOSet supports rotation.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     Vector<T>& dhpsioverpsi,
+                                     const int& FirstIndex,
+                                     const int& LastIndex)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivatives "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                       const opt_variables_type& optvars,
+                                       Vector<T>& dlogpsi,
+                                       int FirstIndex,
+                                       int LastIndex)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivativesWF "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
+                                     const opt_variables_type& optvars,
+                                     ValueVector& psi,
+                                     const ValueVector& psiinv,
+                                     std::vector<T>& ratios,
+                                     Matrix<T>& dratios,
+                                     int FirstIndex,
+                                     int LastIndex)
+{
+  // Match the fallback in WaveFunctionComponent that evaluates just the ratios
+  evaluateDetRatios(VP, psi, psiinv, ratios);
+
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivRatios "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivatives(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     Vector<T>& dhpsioverpsi,
+                                     const T& psiCurrent,
+                                     const std::vector<T>& Coeff,
+                                     const std::vector<size_t>& C2node_up,
+                                     const std::vector<size_t>& C2node_dn,
+                                     const ValueVector& detValues_up,
+                                     const ValueVector& detValues_dn,
+                                     const GradMatrix& grads_up,
+                                     const GradMatrix& grads_dn,
+                                     const ValueMatrix& lapls_up,
+                                     const ValueMatrix& lapls_dn,
+                                     const ValueMatrix& M_up,
+                                     const ValueMatrix& M_dn,
+                                     const ValueMatrix& Minv_up,
+                                     const ValueMatrix& Minv_dn,
+                                     const GradMatrix& B_grad,
+                                     const ValueMatrix& B_lapl,
+                                     const std::vector<int>& detData_up,
+                                     const size_t N1,
+                                     const size_t N2,
+                                     const size_t NP1,
+                                     const size_t NP2,
+                                     const std::vector<std::vector<int>>& lookup_tbl)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivatives "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateDerivativesWF(ParticleSet& P,
+                                       const opt_variables_type& optvars,
+                                       Vector<T>& dlogpsi,
+                                       const typename QTFull::ValueType& psiCurrent,
+                                       const std::vector<T>& Coeff,
+                                       const std::vector<size_t>& C2node_up,
+                                       const std::vector<size_t>& C2node_dn,
+                                       const ValueVector& detValues_up,
+                                       const ValueVector& detValues_dn,
+                                       const ValueMatrix& M_up,
+                                       const ValueMatrix& M_dn,
+                                       const ValueMatrix& Minv_up,
+                                       const ValueMatrix& Minv_dn,
+                                       const std::vector<int>& detData_up,
+                                       const std::vector<std::vector<int>>& lookup_tbl)
+{
+  if (isOptimizable())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateDerivativesWF "
+                           "must be overloaded when the SPOSet is optimizable.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSet& source,
+                                    int iat_src,
+                                    GradMatrix& gradphi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSource "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSet& source,
+                                    int iat_src,
+                                    GradMatrix& grad_phi,
+                                    HessMatrix& grad_grad_phi,
+                                    GradMatrix& grad_lapl_phi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSource "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluateGradSourceRow(const ParticleSet& P,
+                                       int iel,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradVector& gradphi)
+{
+  if (hasIonDerivs())
+    throw std::logic_error("Bug!! " + getClassName() +
+                           "::evaluateGradSourceRow "
+                           "must be overloaded when the SPOSet has ion derivatives.");
+}
+
+template<class T>
+void SPOSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+{
+  throw std::runtime_error("Need specialization of " + getClassName() +
+                           "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n");
+}
+
+// Class concrete types from ValueType
+template class SPOSetT<double>;
+template class SPOSetT<float>;
+template class SPOSetT<std::complex<double>>;
+template class SPOSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
new file mode 100644
index 0000000000..ddc14c6593
--- /dev/null
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -0,0 +1,582 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_SPOSETT_H
+#define QMCPLUSPLUS_SPOSETT_H
+
+#include "OhmmsPETE/OhmmsArray.h"
+#include "Particle/ParticleSet.h"
+#include "Particle/VirtualParticleSet.h"
+#include "QMCWaveFunctions/OrbitalSetTraits.h"
+#include "OptimizableObject.h"
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+#include "DualAllocatorAliases.hpp"
+
+namespace qmcplusplus
+{
+class ResourceCollection;
+
+template<class T>
+class SPOSetT;
+namespace testing
+{
+opt_variables_type& getMyVars(SPOSetT<float>& spo);
+opt_variables_type& getMyVars(SPOSetT<double>& spo);
+opt_variables_type& getMyVars(SPOSetT<std::complex<float>>& spo);
+opt_variables_type& getMyVars(SPOSetT<std::complex<double>>& spo);
+} // namespace testing
+
+
+/** base class for Single-particle orbital sets
+ *
+ * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains
+ * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$
+ */
+template<class T>
+class SPOSetT : public QMCTraits
+{
+public:
+  using ValueVector       = typename OrbitalSetTraits<T>::ValueVector;
+  using ValueMatrix       = typename OrbitalSetTraits<T>::ValueMatrix;
+  using GradVector        = typename OrbitalSetTraits<T>::GradVector;
+  using GradMatrix        = typename OrbitalSetTraits<T>::GradMatrix;
+  using GradType          = TinyVector<T, DIM>;
+  using HessVector        = typename OrbitalSetTraits<T>::HessVector;
+  using HessMatrix        = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGVector         = typename OrbitalSetTraits<T>::GradHessVector;
+  using GGGMatrix         = typename OrbitalSetTraits<T>::GradHessMatrix;
+  using SPOMap            = std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+  using PosType           = typename OrbitalSetTraits<T>::PosType;
+  using RealType          = typename OrbitalSetTraits<T>::RealType;
+  using ValueType         = typename OrbitalSetTraits<T>::ValueType;
+  using FullRealType      = typename OrbitalSetTraits<double>::RealType;
+  template<typename DT>
+  using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
+
+  /** constructor */
+  SPOSetT<T>(const std::string& my_name);
+
+  /** destructor
+   *
+   * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet.
+   */
+  virtual ~SPOSetT<T>() = default;
+
+  /** return the size of the orbital set
+   * Ye: this needs to be replaced by getOrbitalSetSize();
+   */
+  inline int size() const { return OrbitalSetSize; }
+
+  /** print basic SPOSet information
+   */
+  void basic_report(const std::string& pad = "") const;
+
+  /** print SPOSet information
+   */
+  virtual void report(const std::string& pad = "") const { basic_report(pad); }
+
+
+  /** return the size of the orbitals
+   */
+  inline int getOrbitalSetSize() const { return OrbitalSetSize; }
+
+  /// Query if this SPOSet is optimizable
+  virtual bool isOptimizable() const { return false; }
+
+  /** extract underlying OptimizableObject references
+   * @param opt_obj_refs aggregated list of optimizable object references
+   */
+  virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs);
+
+  /** check out variational optimizable variables
+   * @param active a super set of optimizable variables
+   */
+  virtual void checkOutVariables(const opt_variables_type& active);
+
+  /// Query if this SPOSet uses OpenMP offload
+  virtual bool isOMPoffload() const { return false; }
+
+  /** Query if this SPOSet has an explicit ion dependence. returns true if it does.
+  */
+  virtual bool hasIonDerivs() const { return false; }
+
+  /// check a few key parameters before putting the SPO into a determinant
+  virtual void checkObject() const {}
+
+  /// return true if this SPOSet can be wrappered by RotatedSPO
+  virtual bool isRotationSupported() const { return false; }
+  /// store parameters before getting destroyed by rotation.
+  virtual void storeParamsBeforeRotation() {}
+  /// apply rotation to all the orbitals
+  virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
+
+  /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction
+  virtual void evaluateDerivatives(ParticleSet& P,
+                                   const opt_variables_type& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const int& FirstIndex,
+                                   const int& LastIndex);
+
+  /// Parameter derivatives of the wavefunction
+  virtual void evaluateDerivativesWF(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     int FirstIndex,
+                                     int LastIndex);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
+   *  this is used only for MSD, to be refined for better serving both single and multi SD
+   */
+  virtual void evaluateDerivatives(ParticleSet& P,
+                                   const opt_variables_type& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const T& psiCurrent,
+                                   const std::vector<T>& Coeff,
+                                   const std::vector<size_t>& C2node_up,
+                                   const std::vector<size_t>& C2node_dn,
+                                   const ValueVector& detValues_up,
+                                   const ValueVector& detValues_dn,
+                                   const GradMatrix& grads_up,
+                                   const GradMatrix& grads_dn,
+                                   const ValueMatrix& lapls_up,
+                                   const ValueMatrix& lapls_dn,
+                                   const ValueMatrix& M_up,
+                                   const ValueMatrix& M_dn,
+                                   const ValueMatrix& Minv_up,
+                                   const ValueMatrix& Minv_dn,
+                                   const GradMatrix& B_grad,
+                                   const ValueMatrix& B_lapl,
+                                   const std::vector<int>& detData_up,
+                                   const size_t N1,
+                                   const size_t N2,
+                                   const size_t NP1,
+                                   const size_t NP2,
+                                   const std::vector<std::vector<int>>& lookup_tbl);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the parameters
+   *  this is used only for MSD, to be refined for better serving both single and multi SD
+   */
+  virtual void evaluateDerivativesWF(ParticleSet& P,
+                                     const opt_variables_type& optvars,
+                                     Vector<T>& dlogpsi,
+                                     const typename QTFull::ValueType& psiCurrent,
+                                     const std::vector<T>& Coeff,
+                                     const std::vector<size_t>& C2node_up,
+                                     const std::vector<size_t>& C2node_dn,
+                                     const ValueVector& detValues_up,
+                                     const ValueVector& detValues_dn,
+                                     const ValueMatrix& M_up,
+                                     const ValueMatrix& M_dn,
+                                     const ValueMatrix& Minv_up,
+                                     const ValueMatrix& Minv_dn,
+                                     const std::vector<int>& detData_up,
+                                     const std::vector<std::vector<int>>& lookup_tbl);
+
+  /** set the OrbitalSetSize
+   * @param norbs number of single-particle orbitals
+   * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly.
+   * It doesn't make sense allowing to set the value at any place in the code.
+   * @TODO make it purely virtual
+   */
+  virtual void setOrbitalSetSize(int norbs){};
+
+  /** evaluate the values of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @TODO make it purely virtual
+   */
+  virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi){};
+
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
+   * @param VP virtual particle set
+   * @param psi values of the SPO, used as a scratch space if needed
+   * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually
+   * @param ratios return determinant ratios
+   */
+  virtual void evaluateDetRatios(const VirtualParticleSet& VP,
+                                 ValueVector& psi,
+                                 const ValueVector& psiinv,
+                                 std::vector<T>& ratios);
+
+
+  /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves
+  virtual void evaluateDerivRatios(const VirtualParticleSet& VP,
+                                   const opt_variables_type& optvars,
+                                   ValueVector& psi,
+                                   const ValueVector& psiinv,
+                                   std::vector<T>& ratios,
+                                   Matrix<T>& dratios,
+                                   int FirstIndex,
+                                   int LastIndex);
+
+
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param vp_list a list of virtual particle sets in a walker batch
+   * @param psi_list a list of values of the SPO, used as a scratch space if needed
+   * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually
+   * @param ratios_list a list of returning determinant ratios
+   */
+  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                    const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                    const RefVector<ValueVector>& psi_list,
+                                    const std::vector<const T*>& invRow_ptr_list,
+                                    std::vector<std::vector<T>>& ratios_list) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   * @TODO make this purely virtual
+   */
+  virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){};
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   * @param dspin spin gradients of the SPO
+   */
+  virtual void evaluateVGL_spin(const ParticleSet& P,
+                                int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi,
+                                ValueVector& dspin);
+
+  /** evaluate the values this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   */
+  virtual void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSet>& P_list,
+                                int iat,
+                                const RefVector<ValueVector>& psi_v_list) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   */
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list) const;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   * @param mw_dspin is a dual matrix of spin gradients [nw][norb]
+   * Note that the device side of mw_dspin is up to date
+   */
+  virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const RefVector<ValueVector>& psi_v_list,
+                                      const RefVector<GradVector>& dpsi_v_list,
+                                      const RefVector<ValueVector>& d2psi_v_list,
+                                      OffloadMatrix<ComplexType>& mw_dspin) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param psi_ratio_grads_v determinant ratio and grads of all the walkers
+   */
+  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads) const;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
+   *  Includes spin gradients
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param ratios, ratios of all walkers
+   * @param grads, spatial gradients of all walkers
+   * @param spingrads, spin gradients of all walkers
+   */
+  virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                                      int iat,
+                                                      const std::vector<const T*>& invRow_ptr_list,
+                                                      OffloadMWVGLArray& phi_vgl_v,
+                                                      std::vector<T>& ratios,
+                                                      std::vector<GradType>& grads,
+                                                      std::vector<T>& spingrads) const;
+
+  /** evaluate the values, gradients and hessians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param grad_grad_psi hessians of the SPO
+   */
+  virtual void evaluateVGH(const ParticleSet& P,
+                           int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& grad_grad_psi);
+
+  /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param grad_grad_psi hessians of the SPO
+   * @param grad_grad_grad_psi grad hessians of the SPO
+   */
+  virtual void evaluateVGHGH(const ParticleSet& P,
+                             int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi,
+                             GGGVector& grad_grad_grad_psi);
+
+  /** evaluate the values of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   */
+  virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi);
+
+  /** evaluate the third derivatives of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param first first particle
+   * @param last last particle
+   * @param grad_grad_grad_logdet third derivatives of the SPO
+   */
+  virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet);
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
+   * @param[in] P current ParticleSet
+   * @param[in] first starting index of the particles
+   * @param[in] last ending index of the particles
+   * @param[out] logdet determinant matrix to be inverted
+   * @param[out] dlogdet gradients
+   * @param[out] d2logdet laplacians
+   * @TODO make this pure virtual
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    ValueMatrix& d2logdet){};
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param d2logdet laplacians
+   * @param dspinlogdet, spin gradients
+   *
+   * default implementation will abort for all SPOSets except SpinorSet
+   *
+   */
+  virtual void evaluate_notranspose_spin(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet,
+                                         ValueMatrix& dspinlogdet);
+
+  virtual void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSet>& P_list,
+                                       int first,
+                                       int last,
+                                       const RefVector<ValueMatrix>& logdet_list,
+                                       const RefVector<GradMatrix>& dlogdet_list,
+                                       const RefVector<ValueMatrix>& d2logdet_list) const;
+
+  /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param grad_grad_logdet hessians
+   *
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet);
+
+  /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param grad_grad_logdet hessians
+   * @param grad_grad_grad_logdet third derivatives
+   *
+   */
+  virtual void evaluate_notranspose(const ParticleSet& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet,
+                                    GGGMatrix& grad_grad_grad_logdet);
+
+  /** evaluate the gradients of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi);
+
+  /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients of values
+   * @param grad_grad_phi gradients of gradients
+   * @param grad_lapl_phi gradients of laplacians
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& grad_phi,
+                                  HessMatrix& grad_grad_phi,
+                                  GradMatrix& grad_lapl_phi);
+
+  /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.  
+   *
+   *  @param[in] P particle set.
+   *  @param[in] iel The electron at which to evaluate phi(r_iel)
+   *  @param[in] source ion particle set.
+   *  @param[in] iat_src ion ID w.r.t. which to take derivative.
+   *  @param[in,out] gradphi Vector of d/dR_iat phi_j(r).
+   *  @return Void
+   */
+  virtual void evaluateGradSourceRow(const ParticleSet& P,
+                                     int iel,
+                                     const ParticleSet& source,
+                                     int iat_src,
+                                     GradVector& gradphi);
+
+  /** access the k point related to the given orbital */
+  virtual PosType get_k(int orb) { return PosType(); }
+
+  /** initialize a shared resource and hand it to collection
+   */
+  virtual void createResource(ResourceCollection& collection) const {}
+
+  /** acquire a shared resource from collection
+   */
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
+
+  /** return a shared resource to collection
+   */
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
+
+  /** make a clone of itself
+   * every derived class must implement this to have threading working correctly.
+   */
+  [[noreturn]] virtual std::unique_ptr<SPOSetT<T>> makeClone() const;
+
+  /** Used only by cusp correction in AOS LCAO.
+   * Ye: the SoA LCAO moves all this responsibility to the builder.
+   * This interface should be removed with AoS.
+   */
+  virtual bool transformSPOSet() { return true; }
+
+  /** finalize the construction of SPOSet
+   *
+   * for example, classes serving accelerators may need to transfer data from host to device
+   * after the host side objects are built.
+   */
+  virtual void finalizeConstruction() {}
+
+  /// return object name
+  const std::string& getName() const { return my_name_; }
+
+  /// @TODO make this purely virutal return class name
+  virtual std::string getClassName() const { return ""; };
+
+protected:
+  /// name of the object, unique identifier
+  const std::string my_name_;
+  ///number of Single-particle orbitals
+  IndexType OrbitalSetSize;
+  /// Optimizable variables
+  opt_variables_type myVars;
+
+  friend opt_variables_type& testing::getMyVars(SPOSetT<float>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<double>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<float>>& spo);
+  friend opt_variables_type& testing::getMyVars(SPOSetT<std::complex<double>>& spo);
+};
+
+template<class T>
+using SPOSetTPtr = SPOSetT<T>*;
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp
new file mode 100644
index 0000000000..64d7d3d6b1
--- /dev/null
+++ b/src/QMCWaveFunctions/SpinorSetT.cpp
@@ -0,0 +1,586 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+//
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SpinorSetT.h"
+#include "Utilities/ResourceCollection.h"
+#include "Platforms/OMPTarget/OMPTargetMath.hpp"
+
+namespace qmcplusplus
+{
+template<class T>
+struct SpinorSetT<T>::SpinorSetMultiWalkerResource : public Resource
+{
+  SpinorSetMultiWalkerResource() : Resource("SpinorSet") {}
+  SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {}
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<SpinorSetMultiWalkerResource>(*this); }
+  OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
+  std::vector<T> up_ratios, dn_ratios;
+  std::vector<GradType> up_grads, dn_grads;
+  std::vector<RealType> spins;
+};
+
+template<class T>
+SpinorSetT<T>::SpinorSetT(const std::string& my_name) : SPOSetT<T>(my_name), spo_up(nullptr), spo_dn(nullptr)
+{}
+
+template<class T>
+SpinorSetT<T>::~SpinorSetT() = default;
+
+template<class T>
+void SpinorSetT<T>::set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
+{
+  //Sanity check for input SPO's.  They need to be the same size or
+  IndexType spo_size_up   = up->getOrbitalSetSize();
+  IndexType spo_size_down = dn->getOrbitalSetSize();
+
+  if (spo_size_up != spo_size_down)
+    throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO components have different sizes.");
+
+  setOrbitalSetSize(spo_size_up);
+
+  spo_up = std::move(up);
+  spo_dn = std::move(dn);
+
+  psi_work_up.resize(this->OrbitalSetSize);
+  psi_work_down.resize(this->OrbitalSetSize);
+
+  dpsi_work_up.resize(this->OrbitalSetSize);
+  dpsi_work_down.resize(this->OrbitalSetSize);
+
+  d2psi_work_up.resize(this->OrbitalSetSize);
+  d2psi_work_down.resize(this->OrbitalSetSize);
+}
+
+template<class T>
+void SpinorSetT<T>::setOrbitalSetSize(int norbs)
+{
+  this->OrbitalSetSize = norbs;
+};
+
+template<class T>
+void SpinorSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
+
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  //This is only supported in the complex build, so T is some complex number depending on the precision.
+  T eis(coss, sins);
+  T emis(coss, -sins);
+
+  psi = eis * psi_work_up + emis * psi_work_down;
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
+
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateVGL_spin(const ParticleSet& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi,
+                                     ValueVector& dspin)
+{
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
+
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
+
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+  dspin = eye * (eis * psi_work_up - emis * psi_work_down);
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                           int iat,
+                                           const RefVector<ValueVector>& psi_v_list,
+                                           const RefVector<GradVector>& dpsi_v_list,
+                                           const RefVector<ValueVector>& d2psi_v_list,
+                                           OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw                    = spo_list.size();
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
+  RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
+  RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
+  for (int iw = 0; iw < nw; iw++)
+  {
+    auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_psi_v_list.push_back(spo.psi_work_up);
+    dn_psi_v_list.push_back(spo.psi_work_down);
+    up_dpsi_v_list.push_back(spo.dpsi_work_up);
+    dn_dpsi_v_list.push_back(spo.dpsi_work_down);
+    up_d2psi_v_list.push_back(spo.d2psi_work_up);
+    dn_d2psi_v_list.push_back(spo.d2psi_work_down);
+  }
+
+  up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list);
+  dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list);
+
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    psi_v_list[iw].get()   = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
+    dpsi_v_list[iw].get()  = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
+    d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
+    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+      mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]);
+  }
+  //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device
+  mw_dspin.updateTo();
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                           const RefVectorWithLeader<ParticleSet>& P_list,
+                                                           int iat,
+                                                           const std::vector<const T*>& invRow_ptr_list,
+                                                           OffloadMWVGLArray& phi_vgl_v,
+                                                           std::vector<T>& ratios,
+                                                           std::vector<GradType>& grads,
+                                                           std::vector<T>& spingrads) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+  assert(phi_vgl_v.size(0) == DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+
+  auto& mw_res       = spo_leader.mw_res_handle_.getResource();
+  auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
+  auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
+  auto& up_ratios    = mw_res.up_ratios;
+  auto& dn_ratios    = mw_res.dn_ratios;
+  auto& up_grads     = mw_res.up_grads;
+  auto& dn_grads     = mw_res.dn_grads;
+  auto& spins        = mw_res.spins;
+
+  up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  up_ratios.resize(nw);
+  dn_ratios.resize(nw);
+  up_grads.resize(nw);
+  dn_grads.resize(nw);
+  spins.resize(nw);
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios,
+                                               up_grads);
+  dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios,
+                                               dn_grads);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+    spins[iw]               = s;
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    ratios[iw]    = eis * up_ratios[iw] + emis * dn_ratios[iw];
+    grads[iw]     = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw];
+    spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
+  }
+
+  auto* spins_ptr = spins.data();
+  //This data lives on the device
+  auto* phi_vgl_ptr    = phi_vgl_v.data();
+  auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
+  auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
+  PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RealType c, s;
+    omptarget::sincos(spins_ptr[iw], &s, &c);
+    T eis(c, s), emis(c, -s);
+    PRAGMA_OFFLOAD("omp parallel for collapse(2)")
+    for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iorb = 0; iorb < norb_requested; iorb++)
+      {
+        auto offset         = idim * nw * norb_requested + iw * norb_requested + iorb;
+        phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset];
+      }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet)
+{
+  IndexType nelec = P.getTotalNum();
+
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+    RealType coss(0.0), sins(0.0);
+
+    coss = std::cos(s);
+    sins = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)   = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)  = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+    }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                            const RefVectorWithLeader<ParticleSet>& P_list,
+                                            int first,
+                                            int last,
+                                            const RefVector<ValueMatrix>& logdet_list,
+                                            const RefVector<GradMatrix>& dlogdet_list,
+                                            const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw    = spo_list.size();
+  IndexType nelec = P_leader.getTotalNum();
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
+  std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
+  std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
+  mw_up_logdet.reserve(nw);
+  mw_dn_logdet.reserve(nw);
+  mw_up_dlogdet.reserve(nw);
+  mw_dn_dlogdet.reserve(nw);
+  mw_up_d2logdet.reserve(nw);
+  mw_dn_d2logdet.reserve(nw);
+
+  RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
+  RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
+  RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
+  up_logdet_list.reserve(nw);
+  dn_logdet_list.reserve(nw);
+  up_dlogdet_list.reserve(nw);
+  dn_dlogdet_list.reserve(nw);
+  up_d2logdet_list.reserve(nw);
+  dn_d2logdet_list.reserve(nw);
+
+  ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
+  GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    mw_up_logdet.emplace_back(tmp_val_mat);
+    up_logdet_list.emplace_back(mw_up_logdet.back());
+    mw_dn_logdet.emplace_back(tmp_val_mat);
+    dn_logdet_list.emplace_back(mw_dn_logdet.back());
+
+    mw_up_dlogdet.emplace_back(tmp_grad_mat);
+    up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
+    mw_dn_dlogdet.emplace_back(tmp_grad_mat);
+    dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
+
+    mw_up_d2logdet.emplace_back(tmp_val_mat);
+    up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
+    mw_dn_d2logdet.emplace_back(tmp_val_mat);
+    dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
+  }
+
+  up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list,
+                                        up_d2logdet_list);
+  dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list,
+                                        dn_d2logdet_list);
+
+  for (int iw = 0; iw < nw; iw++)
+    for (int iat = 0; iat < nelec; iat++)
+    {
+      ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat);
+      RealType coss           = std::cos(s);
+      RealType sins           = std::sin(s);
+      T eis(coss, sins);
+      T emis(coss, -sins);
+
+      for (int no = 0; no < this->OrbitalSetSize; no++)
+      {
+        logdet_list[iw].get()(iat, no) =
+            eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no);
+        dlogdet_list[iw].get()(iat, no) =
+            eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no);
+        d2logdet_list[iw].get()(iat, no) =
+            eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no);
+      }
+    }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSet& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet,
+                                              ValueMatrix& dspinlogdet)
+{
+  IndexType nelec = P.getTotalNum();
+
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
+
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+    RealType coss(0.0), sins(0.0);
+
+    coss = std::cos(s);
+    sins = std::sin(s);
+
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)      = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)     = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no)    = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+      dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no));
+    }
+  }
+}
+
+template<class T>
+void SpinorSetT<T>::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi)
+{
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
+
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
+
+  ParticleSet::Scalar_t s = P.activeSpin(iat);
+
+  RealType coss(0.0), sins(0.0);
+
+  coss = std::cos(s);
+  sins = std::sin(s);
+
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
+
+  psi  = eis * psi_work_up + emis * psi_work_down;
+  dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
+}
+
+template<class T>
+void SpinorSetT<T>::evaluateGradSource(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSet& source,
+                                       int iat_src,
+                                       GradMatrix& gradphi)
+{
+  IndexType nelec = P.getTotalNum();
+
+  GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
+  GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
+  spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
+  spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    ParticleSet::Scalar_t s = P.activeSpin(iat);
+    RealType coss           = std::cos(s);
+    RealType sins           = std::sin(s);
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    for (int imo = 0; imo < this->OrbitalSetSize; imo++)
+      gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
+  }
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> SpinorSetT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
+  std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
+  std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
+  myclone->set_spos(std::move(cloneup), std::move(clonedn));
+  return myclone;
+}
+
+template<class T>
+void SpinorSetT<T>::createResource(ResourceCollection& collection) const
+{
+  spo_up->createResource(collection);
+  spo_dn->createResource(collection);
+  auto index = collection.addResource(std::make_unique<SpinorSetMultiWalkerResource>());
+}
+
+template<class T>
+void SpinorSetT<T>::acquireResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.acquireResource(collection, up_spo_list);
+  dn_spo_leader.acquireResource(collection, dn_spo_list);
+  spo_leader.mw_res_handle_ = collection.lendResource<SpinorSetMultiWalkerResource>();
+}
+
+template<class T>
+void SpinorSetT<T>::releaseResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.releaseResource(collection, up_spo_list);
+  dn_spo_leader.releaseResource(collection, dn_spo_list);
+  collection.takebackResource(spo_leader.mw_res_handle_);
+}
+
+template<class T>
+std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> SpinorSetT<T>::extractSpinComponentRefList(
+    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+{
+  SpinorSetT<T>& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  IndexType nw              = spo_list.size();
+  SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
+  SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
+  RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
+  RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
+  up_spo_list.reserve(nw);
+  dn_spo_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    SpinorSetT<T>& spinor = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_spo_list.emplace_back(*(spinor.spo_up));
+    dn_spo_list.emplace_back(*(spinor.spo_dn));
+  }
+  return std::make_pair(up_spo_list, dn_spo_list);
+}
+
+template class SpinorSetT<std::complex<double>>;
+template class SpinorSetT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h
new file mode 100644
index 0000000000..fe50a256fe
--- /dev/null
+++ b/src/QMCWaveFunctions/SpinorSetT.h
@@ -0,0 +1,229 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+//
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_SPINORSETT_H
+#define QMCPLUSPLUS_SPINORSETT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+#include "ResourceHandle.h"
+
+namespace qmcplusplus
+{
+/** Class for Melton & Mitas style Spinors.
+ *
+ */
+template<class T>
+class SpinorSetT : public SPOSetT<T>
+{
+public:
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  //using OffloadMWVGLArray = typename SPOSetT<T>::template OffloadMWCGLArray;
+  template<typename DT>
+  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
+  using RealType      = typename SPOSetT<T>::RealType;
+  using IndexType     = OHMMS_INDEXTYPE;
+
+  /** constructor */
+  SpinorSetT(const std::string& my_name);
+  ~SpinorSetT() override;
+
+  std::string getClassName() const override { return "SpinorSet"; }
+  bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); }
+  bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); }
+  bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); }
+
+  //This class is initialized by separately building the up and down channels of the spinor set and
+  //then registering them.
+  void set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
+
+  /** set the OrbitalSetSize
+   * @param norbs number of single-particle orbitals
+   */
+  void setOrbitalSetSize(int norbs) override;
+
+  /** evaluate the values of this spinor set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   */
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital set
+   * @param P current ParticleSet
+   * @param iat active particle
+   * @param psi values of the SPO
+   * @param dpsi gradients of the SPO
+   * @param d2psi laplacians of the SPO
+   * @param dspin spin gradient of the SPO
+   */
+  void evaluateVGL_spin(const ParticleSet& P,
+                        int iat,
+                        ValueVector& psi,
+                        GradVector& dpsi,
+                        ValueVector& d2psi,
+                        ValueVector& dspin) override;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param psi_v_list the list of value vector pointers in a walker batch
+   * @param dpsi_v_list the list of gradient vector pointers in a walker batch
+   * @param d2psi_v_list the list of laplacian vector pointers in a walker batch
+   * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals
+   */
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<QMCTraits::ComplexType>& mw_dspin) const override;
+
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio
+   *  and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return.
+   *  Includes spin gradients
+   * @param spo_list the list of SPOSet pointers in a walker batch
+   * @param P_list the list of ParticleSet pointers in a walker batch
+   * @param iat active particle
+   * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers
+   * @param ratios, ratios of all walkers
+   * @param grads, spatial gradients of all walkers
+   * @param spingrads, spin gradients of all walkers
+   */
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<T>& spingrads) const override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param logdet determinant matrix to be inverted
+   * @param dlogdet gradients
+   * @param d2logdet laplacians
+   *
+   */
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void evaluate_notranspose_spin(const ParticleSet& P,
+                                 int first,
+                                 int last,
+                                 ValueMatrix& logdet,
+                                 GradMatrix& dlogdet,
+                                 ValueMatrix& d2logdet,
+                                 ValueMatrix& dspinlogdet) override;
+  /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat.
+   *  @param P current particle set.
+   *  @param iat electron index.
+   *  @param spinor values.
+   *  @param spin gradient values. d/ds phi(r,s).
+   *
+   */
+  void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override;
+
+  /** evaluate the gradients of this single-particle orbital
+   *  for [first,last) target particles with respect to the given source particle
+   * @param P current ParticleSet
+   * @param first starting index of the particles
+   * @param last ending index of the particles
+   * @param iat_src source particle index
+   * @param gradphi gradients
+   *
+   */
+  virtual void evaluateGradSource(const ParticleSet& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSet& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi) override;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  /// check if the multi walker resource is owned. For testing only.
+  bool isResourceOwned() const { return bool(mw_res_handle_); }
+
+private:
+  struct SpinorSetMultiWalkerResource;
+  ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
+
+  std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> extractSpinComponentRefList(
+      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
+
+  //Sposet for the up and down channels of our spinors.
+  std::unique_ptr<SPOSetT<T>> spo_up;
+  std::unique_ptr<SPOSetT<T>> spo_dn;
+
+  //temporary arrays for holding the values of the up and down channels respectively.
+  ValueVector psi_work_up;
+  ValueVector psi_work_down;
+
+  //temporary arrays for holding the gradients of the up and down channels respectively.
+  GradVector dpsi_work_up;
+  GradVector dpsi_work_down;
+
+  //temporary arrays for holding the laplacians of the up and down channels respectively.
+  ValueVector d2psi_work_up;
+  ValueVector d2psi_work_down;
+
+  //Same as above, but these are the full matrices containing all spinor/particle combinations.
+  ValueMatrix logpsi_work_up;
+  ValueMatrix logpsi_work_down;
+
+  GradMatrix dlogpsi_work_up;
+  GradMatrix dlogpsi_work_down;
+
+  ValueMatrix d2logpsi_work_up;
+  ValueMatrix d2logpsi_work_down;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt
index ec066f8735..b414f0158b 100644
--- a/src/QMCWaveFunctions/tests/CMakeLists.txt
+++ b/src/QMCWaveFunctions/tests/CMakeLists.txt
@@ -111,6 +111,7 @@ set(SPOSET_SRC
     test_hybridrep.cpp
     test_pw.cpp
     test_ConstantSPOSet.cpp
+    test_ConstantSPOSetT.cpp
     ${MO_SRCS})
 if(NiO_a16_H5_FOUND)
   set(SPOSET_SRC ${SPOSET_SRC} test_einset_NiO_a16.cpp)
@@ -139,7 +140,7 @@ set(DETERMINANT_SRC
     test_ci_configuration.cpp
     test_multi_slater_determinant.cpp)
 
-add_library(sposets_for_testing FakeSPO.cpp ConstantSPOSet.cpp)
+add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSet.cpp ConstantSPOSetT.cpp)
 target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(sposets_for_testing PUBLIC qmcwfs)
 
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
new file mode 100644
index 0000000000..49e5070241
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
@@ -0,0 +1,124 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 Raymond Clay and QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "ConstantSPOSetT.h"
+
+namespace qmcplusplus
+{
+
+template<class T>
+ConstantSPOSetT<T>::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals)
+    : SPOSetT<T>(my_name), numparticles_(nparticles)
+{
+  this->OrbitalSetSize = norbitals;
+  ref_psi_.resize(numparticles_, this->OrbitalSetSize);
+  ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
+  ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
+
+  ref_psi_   = 0.0;
+  ref_egrad_ = 0.0;
+  ref_elapl_ = 0.0;
+}
+
+template<class T>
+std::unique_ptr<SPOSetT<T>> ConstantSPOSetT<T>::makeClone() const
+{
+  auto myclone = std::make_unique<ConstantSPOSetT<T>>(this->my_name_, numparticles_, this->OrbitalSetSize);
+  myclone->setRefVals(ref_psi_);
+  myclone->setRefEGrads(ref_egrad_);
+  myclone->setRefELapls(ref_elapl_);
+  return myclone;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::checkOutVariables(const opt_variables_type& active)
+{
+  APP_ABORT("ConstantSPOSet should not call checkOutVariables");
+};
+
+template<class T>
+void ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
+{
+  APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
+{
+  assert(vals.cols() == this->OrbitalSetSize);
+  assert(vals.rows() == numparticles_);
+  ref_psi_ = vals;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
+{
+  assert(grads.cols() == this->OrbitalSetSize);
+  assert(grads.rows() == numparticles_);
+  ref_egrad_ = grads;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
+{
+  assert(lapls.cols() == this->OrbitalSetSize);
+  assert(lapls.rows() == numparticles_);
+  ref_elapl_ = lapls;
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  const auto* vp = dynamic_cast<const VirtualParticleSet*>(&P);
+  int ptcl       = vp ? vp->refPtcl : iat;
+  assert(psi.size() == this->OrbitalSetSize);
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+    psi[iorb] = ref_psi_(ptcl, iorb);
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluateVGL(const ParticleSet& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi)
+{
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+  {
+    psi[iorb]   = ref_psi_(iat, iorb);
+    dpsi[iorb]  = ref_egrad_(iat, iorb);
+    d2psi[iorb] = ref_elapl_(iat, iorb);
+  }
+}
+
+template<class T>
+void ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSet& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet)
+{
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector v(logdet[i], logdet.cols());
+    GradVector g(dlogdet[i], dlogdet.cols());
+    ValueVector l(d2logdet[i], d2logdet.cols());
+    evaluateVGL(P, iat, v, g, l);
+  }
+}
+
+template class ConstantSPOSetT<float>;
+template class ConstantSPOSetT<double>;
+template class ConstantSPOSetT<std::complex<float>>;
+template class ConstantSPOSetT<std::complex<double>>;
+
+} //namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
new file mode 100644
index 0000000000..483136360a
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
@@ -0,0 +1,93 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 Raymond Clay and QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_CONSTANTSPOSETT_H
+#define QMCPLUSPLUS_CONSTANTSPOSETT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+/** Constant SPOSet for testing purposes.  Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc.,
+   *  These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc.
+   *  Exists to provide deterministic and known output to objects requiring SPOSet evaluations.      
+   *
+   */
+template<class T>
+class ConstantSPOSetT : public SPOSetT<T>
+{
+public:
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+
+  ConstantSPOSetT(const std::string& my_name) = delete;
+
+  //Constructor needs number of particles and number of orbitals.  This is the minimum
+  //amount of information needed to sanely construct all data members and perform size
+  //checks later.
+  ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals);
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  std::string getClassName() const final { return "ConstantSPOSet"; };
+
+  void checkOutVariables(const opt_variables_type& active) final;
+
+  void setOrbitalSetSize(int norbs) final;
+
+  /**
+  * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_.
+  * @param Nelec x Nion ValueType matrix of \phi_j(r_i)
+  * @return void
+  */
+  void setRefVals(const ValueMatrix& vals);
+  /**
+  * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_.
+  * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i)
+  * @return void
+  */
+  void setRefEGrads(const GradMatrix& grads);
+  /**
+  * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_.
+  * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i)
+  * @return void
+  */
+  void setRefELapls(const ValueMatrix& lapls);
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+private:
+  const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices.
+                           /// To ensure consistent array sizing and enforcement,
+                           /// we agree at construction how large these matrices will be.
+                           /// norb is stored in SPOSet::OrbitalSetSize.
+
+  //Value, electron gradient, and electron laplacian at "reference configuration".
+  //i.e. before any attempted moves.
+
+  ValueMatrix ref_psi_;
+  GradMatrix ref_egrad_;
+  ValueMatrix ref_elapl_;
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
new file mode 100644
index 0000000000..fcf1637682
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
@@ -0,0 +1,160 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "FakeSPOT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+FakeSPOT<T>::FakeSPOT() : SPOSetT<T>("one_FakeSPO")
+{
+  a.resize(3, 3);
+
+  a(0, 0) = 2.3;
+  a(0, 1) = 4.5;
+  a(0, 2) = 2.6;
+  a(1, 0) = 0.5;
+  a(1, 1) = 8.5;
+  a(1, 2) = 3.3;
+  a(2, 0) = 1.8;
+  a(2, 1) = 4.4;
+  a(2, 2) = 4.9;
+
+  v.resize(3);
+  v[0] = 1.9;
+  v[1] = 2.0;
+  v[2] = 3.1;
+
+
+  a2.resize(4, 4);
+  a2(0, 0) = 2.3;
+  a2(0, 1) = 4.5;
+  a2(0, 2) = 2.6;
+  a2(0, 3) = 1.2;
+  a2(1, 0) = 0.5;
+  a2(1, 1) = 8.5;
+  a2(1, 2) = 3.3;
+  a2(1, 3) = 0.3;
+  a2(2, 0) = 1.8;
+  a2(2, 1) = 4.4;
+  a2(2, 2) = 4.9;
+  a2(2, 3) = 2.8;
+  a2(3, 0) = 0.8;
+  a2(3, 1) = 4.1;
+  a2(3, 2) = 3.2;
+  a2(3, 3) = 1.1;
+
+  v2.resize(4, 4);
+
+  v2(0, 0) = 3.2;
+  v2(0, 1) = 0.5;
+  v2(0, 2) = 5.9;
+  v2(0, 3) = 3.7;
+  v2(1, 0) = 0.3;
+  v2(1, 1) = 1.4;
+  v2(1, 2) = 3.9;
+  v2(1, 3) = 8.2;
+  v2(2, 0) = 3.3;
+  v2(2, 1) = 5.4;
+  v2(2, 2) = 4.9;
+  v2(2, 3) = 2.2;
+  v2(3, 1) = 5.4;
+  v2(3, 2) = 4.9;
+  v2(3, 3) = 2.2;
+
+  gv.resize(4);
+  gv[0] = GradType(1.0, 0.0, 0.1);
+  gv[1] = GradType(1.0, 2.0, 0.1);
+  gv[2] = GradType(2.0, 1.0, 0.1);
+  gv[3] = GradType(0.4, 0.3, 0.1);
+}
+template<class T>
+std::unique_ptr<SPOSetT<T>> FakeSPOT<T>::makeClone() const
+{
+  return std::make_unique<FakeSPOT<T>>(*this);
+}
+
+template<class T>
+void FakeSPOT<T>::setOrbitalSetSize(int norbs)
+{
+  this->OrbitalSetSize = norbs;
+}
+
+template<class T>
+void FakeSPOT<T>::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi)
+{
+  if (iat < 0)
+    for (int i = 0; i < psi.size(); i++)
+      psi[i] = 1.2 * i - i * i;
+  else if (this->OrbitalSetSize == 3)
+    for (int i = 0; i < 3; i++)
+      psi[i] = a(iat, i);
+  else if (this->OrbitalSetSize == 4)
+    for (int i = 0; i < 4; i++)
+      psi[i] = a2(iat, i);
+}
+
+template<class T>
+void FakeSPOT<T>::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+{
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+    {
+      psi[i]  = v[i];
+      dpsi[i] = gv[i];
+    }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+    {
+      psi[i]  = v2(iat, i);
+      dpsi[i] = gv[i];
+    }
+  }
+}
+
+template<class T>
+void FakeSPOT<T>::evaluate_notranspose(const ParticleSet& P,
+                                       int first,
+                                       int last,
+                                       ValueMatrix& logdet,
+                                       GradMatrix& dlogdet,
+                                       ValueMatrix& d2logdet)
+{
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+      for (int j = 0; j < 3; j++)
+      {
+        logdet(j, i)  = a(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+      for (int j = 0; j < 4; j++)
+      {
+        logdet(j, i)  = a2(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
+}
+
+// Class concrete types from ValueType
+template class FakeSPOT<double>;
+template class FakeSPOT<float>;
+template class FakeSPOT<std::complex<double>>;
+template class FakeSPOT<std::complex<float>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h
new file mode 100644
index 0000000000..dfa6689bd6
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.h
@@ -0,0 +1,62 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//
+// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_FAKESPOTT_H
+#define QMCPLUSPLUS_FAKESPOTT_H
+
+#include "QMCWaveFunctions/SPOSetT.h"
+
+namespace qmcplusplus
+{
+template<class T>
+class FakeSPOT : public SPOSetT<T>
+{
+public:
+  Matrix<T> a;
+  Matrix<T> a2;
+  Vector<T> v;
+  Matrix<T> v2;
+
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType    = typename SPOSetT<T>::GradType;
+
+  typename SPOSetT<T>::GradVector gv;
+
+  FakeSPOT();
+
+  ~FakeSPOT() override = default;
+
+  std::string getClassName() const override { return "FakeSPO"; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  virtual void report() {}
+
+  void setOrbitalSetSize(int norbs) override;
+
+  void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
new file mode 100644
index 0000000000..87425bbb91
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp
@@ -0,0 +1,136 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 QMCPACK developers.
+//
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//
+// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "catch.hpp"
+#include "Configuration.h"
+#include "QMCWaveFunctions/WaveFunctionTypes.hpp"
+#include "QMCWaveFunctions/tests/ConstantSPOSetT.h"
+#include "Utilities/for_testing/checkMatrix.hpp"
+namespace qmcplusplus
+{
+//Ray:  Figure out how to template me on value type.
+TEST_CASE("ConstantSPOSetT", "[wavefunction]")
+{
+  //For now, do a small square case.
+  const int nelec   = 2;
+  const int norb    = 2;
+  using WF          = WaveFunctionTypes<QMCTraits::ValueType, QMCTraits::FullPrecValueType>;
+  using Real        = WF::Real;
+  using Value       = WF::Value;
+  using Grad        = WF::Grad;
+  using ValueVector = Vector<Value>;
+  using GradVector  = Vector<Grad>;
+  using ValueMatrix = Matrix<Value>;
+  using GradMatrix  = Matrix<Grad>;
+
+  ValueVector row0{Value(0.92387953), Value(0.92387953)};
+  ValueVector row1{Value(0.29131988), Value(0.81078057)};
+
+  GradVector grow0{Grad({-2.22222, -1.11111, 0.33333}), Grad({8.795388, -0.816057, -0.9238793})};
+  GradVector grow1{Grad({2.22222, 1.11111, -0.33333}), Grad({-8.795388, 0.816057, 0.9238793})};
+
+  ValueVector lrow0{Value(-0.2234545), Value(0.72340234)};
+  ValueVector lrow1{Value(-12.291810), Value(6.879057)};
+
+
+  ValueMatrix spomat;
+  GradMatrix gradspomat;
+  ValueMatrix laplspomat;
+
+  spomat.resize(nelec, norb);
+  gradspomat.resize(nelec, norb);
+  laplspomat.resize(nelec, norb);
+
+  for (int iorb = 0; iorb < norb; iorb++)
+  {
+    spomat(0, iorb) = row0[iorb];
+    spomat(1, iorb) = row1[iorb];
+
+    gradspomat(0, iorb) = grow0[iorb];
+    gradspomat(1, iorb) = grow1[iorb];
+
+    laplspomat(0, iorb) = lrow0[iorb];
+    laplspomat(1, iorb) = lrow1[iorb];
+  }
+
+
+  const SimulationCell simulation_cell;
+  ParticleSet elec(simulation_cell);
+
+  elec.create({nelec});
+
+  ValueVector psiV = {0.0, 0.0};
+  ValueVector psiL = {0.0, 0.0};
+  GradVector psiG;
+  psiG.resize(norb);
+
+  //Test of value only constructor.
+  auto sposet = std::make_unique<ConstantSPOSetT<Value>>("constant_spo", nelec, norb);
+  sposet->setRefVals(spomat);
+  sposet->setRefEGrads(gradspomat);
+  sposet->setRefELapls(laplspomat);
+
+  sposet->evaluateValue(elec, 0, psiV);
+
+  CHECK(psiV[0] == row0[0]);
+  CHECK(psiV[1] == row0[1]);
+
+
+  psiV = 0.0;
+
+  sposet->evaluateValue(elec, 1, psiV);
+  CHECK(psiV[0] == row1[0]);
+  CHECK(psiV[1] == row1[1]);
+
+  psiV = 0.0;
+
+  sposet->evaluateVGL(elec, 1, psiV, psiG, psiL);
+
+  for (int iorb = 0; iorb < norb; iorb++)
+  {
+    CHECK(psiV[iorb] == row1[iorb]);
+    CHECK(psiL[iorb] == lrow1[iorb]);
+
+    for (int idim = 0; idim < OHMMS_DIM; idim++)
+      CHECK(psiG[iorb][idim] == grow1[iorb][idim]);
+  }
+  //Test of evaluate_notranspose.
+  ValueMatrix phimat, lphimat;
+  GradMatrix gphimat;
+  phimat.resize(nelec, norb);
+  gphimat.resize(nelec, norb);
+  lphimat.resize(nelec, norb);
+
+  const int first_index = 0; //Only 2 electrons in this case.
+  const int last_index  = 2;
+  sposet->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat);
+
+  checkMatrix(phimat, spomat);
+  checkMatrix(lphimat, laplspomat);
+
+  //Test of makeClone()
+  auto sposet_vgl2 = sposet->makeClone();
+  phimat           = 0.0;
+  gphimat          = 0.0;
+  lphimat          = 0.0;
+
+  sposet_vgl2->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat);
+
+  checkMatrix(phimat, spomat);
+  checkMatrix(lphimat, laplspomat);
+
+  //Lastly, check if name is correct.
+  std::string myname = sposet_vgl2->getClassName();
+  std::string targetstring("ConstantSPOSet");
+  CHECK(myname == targetstring);
+}
+} // namespace qmcplusplus