diff --git a/src/Containers/OhmmsPETE/TinyVectorOps.h b/src/Containers/OhmmsPETE/TinyVectorOps.h
index dbdee0ae009..94c1ec8d4d5 100644
--- a/src/Containers/OhmmsPETE/TinyVectorOps.h
+++ b/src/Containers/OhmmsPETE/TinyVectorOps.h
@@ -355,11 +355,11 @@ struct OTDot<TinyVector<T1, 4>, TinyVector<T2, 4>>
 };
 
 /** specialization for real-complex TinyVector */
-template<class T1>
-struct OTDot<TinyVector<T1, 3>, TinyVector<std::complex<T1>, 3>>
+template<class T1, class T2>
+struct OTDot<TinyVector<T1, 3>, TinyVector<std::complex<T2>, 3>>
 {
   using Type_t = T1;
-  inline static Type_t apply(const TinyVector<T1, 3>& lhs, const TinyVector<std::complex<T1>, 3>& rhs)
+  inline static Type_t apply(const TinyVector<T1, 3>& lhs, const TinyVector<std::complex<T2>, 3>& rhs)
   {
     return lhs[0] * rhs[0].real() + lhs[1] * rhs[1].real() + lhs[2] * rhs[2].real();
   }
diff --git a/src/Numerics/OneDimGridFactory.cpp b/src/Numerics/OneDimGridFactory.cpp
index 16a17ec9b30..bc90a9f5053 100644
--- a/src/Numerics/OneDimGridFactory.cpp
+++ b/src/Numerics/OneDimGridFactory.cpp
@@ -13,19 +13,21 @@
 
 
 #include "OneDimGridFactory.h"
+#include "Configuration.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Message/UniformCommunicateError.h"
 
 namespace qmcplusplus
 {
-std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNodePtr cur)
+template <typename T>
+std::unique_ptr<typename OneDimGridFactory<T>::GridType> OneDimGridFactory<T>::createGrid(xmlNodePtr cur)
 {
   std::unique_ptr<GridType> agrid;
   RealType ri     = 1e-5;
   RealType rf     = 100.0;
   RealType ascale = -1.0e0;
   RealType astep  = 1.25e-2;
-  IndexType npts  = 1001;
+  QMCTraits::IndexType npts  = 1001;
   std::string gridType("log");
   std::string gridID("invalid");
   OhmmsAttributeSet radAttrib;
@@ -74,4 +76,7 @@ std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNo
   }
   return agrid;
 }
+
+template struct OneDimGridFactory<double>;
+template struct OneDimGridFactory<float>;
 } // namespace qmcplusplus
diff --git a/src/Numerics/OneDimGridFactory.h b/src/Numerics/OneDimGridFactory.h
index 6365db25aa6..d27b1fb9041 100644
--- a/src/Numerics/OneDimGridFactory.h
+++ b/src/Numerics/OneDimGridFactory.h
@@ -14,15 +14,17 @@
 
 #ifndef QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
 #define QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
-#include "Configuration.h"
 #include "Numerics/OneDimGridFunctor.h"
+#include "Numerics/LibxmlNumericIO.h"
 
 namespace qmcplusplus
 {
 /** Factory class using Singleton pattern
  */
-struct OneDimGridFactory : public QMCTraits
+template <typename T>
+struct OneDimGridFactory
 {
+  using RealType = T;
   ///typedef of the one-dimensional grid
   using GridType = OneDimGridBase<RealType>;
 
diff --git a/src/Numerics/SoaCartesianTensor.h b/src/Numerics/SoaCartesianTensor.h
index 21fa7f52bf1..540ab826b0d 100644
--- a/src/Numerics/SoaCartesianTensor.h
+++ b/src/Numerics/SoaCartesianTensor.h
@@ -37,7 +37,7 @@ namespace qmcplusplus
 template<class T>
 struct SoaCartesianTensor
 {
-  using value_type = T;
+  using ValueType = T;
   using ggg_type   = TinyVector<Tensor<T, 3>, 3>;
 
   ///maximum angular momentum
diff --git a/src/Numerics/SoaSphericalTensor.h b/src/Numerics/SoaSphericalTensor.h
index 56c638b42e6..c5e4f3e1ae1 100644
--- a/src/Numerics/SoaSphericalTensor.h
+++ b/src/Numerics/SoaSphericalTensor.h
@@ -37,6 +37,8 @@ namespace qmcplusplus
 template<typename T>
 struct SoaSphericalTensor
 {
+  using ValueType = T;
+
   ///maximum angular momentum for the center
   int Lmax;
   /// Normalization factors
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 9d6eace715f..f584084bf74 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -41,10 +41,11 @@ set(WFBASE_SRCS
     HarmonicOscillator/SHOSetBuilder.cpp
     HarmonicOscillator/SHOSetBuilderT.cpp
     ExampleHeBuilder.cpp
-    ExampleHeComponent.cpp)
+    ExampleHeComponent.cpp
+    RotatedSPOsT.cpp)
 
 if(NOT QMC_COMPLEX)
-  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp RotatedSPOsT.cpp)
+  set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp)
 endif(NOT QMC_COMPLEX)
 
 if(QMC_COMPLEX)
@@ -85,18 +86,21 @@ if(OHMMS_DIM MATCHES 3)
       LCAO/LCAOrbitalBuilderT.cpp
       LCAO/MultiQuinticSpline1D.cpp
       LCAO/AOBasisBuilder.cpp
-      LCAO/SoaLocalizedBasisSet.cpp)
+      LCAO/AOBasisBuilderT.cpp
+      LCAO/SoaLocalizedBasisSet.cpp
+      LCAO/SoaLocalizedBasisSetT.cpp
+      LCAO/LCAOSpinorBuilderT.cpp
+      LCAO/LCAOrbitalSetWithCorrectionT.cpp
+      LCAO/CuspCorrectionConstructionT.cpp
+      LCAO/SoaCuspCorrectionT.cpp)
   if(QMC_COMPLEX)
-    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp LCAO/LCAOSpinorBuilder.cpp)
+    set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp)
   else(QMC_COMPLEX)
     #LCAO cusp correction is not ready for complex
     set(FERMION_SRCS ${FERMION_SRCS}
         LCAO/LCAOrbitalSetWithCorrection.cpp
-        LCAO/LCAOrbitalSetWithCorrectionT.cpp
 	    LCAO/CuspCorrectionConstruction.cpp
-	    LCAO/CuspCorrectionConstructionT.cpp
-        LCAO/SoaCuspCorrection.cpp
-        LCAO/SoaCuspCorrectionT.cpp)
+        LCAO/SoaCuspCorrection.cpp)
   endif(QMC_COMPLEX)
 
   if(HAVE_EINSPLINE)
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
new file mode 100644
index 00000000000..022d6db4a50
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
@@ -0,0 +1,923 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
+//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "AOBasisBuilderT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "OhmmsData/AttributeSet.h"
+#include "RadialOrbitalSetBuilder.h"
+#include "SoaAtomicBasisSetT.h"
+#include "Utilities/ProgressReportEngine.h"
+
+namespace qmcplusplus
+{
+template <typename COT>
+AOBasisBuilderT<COT>::AOBasisBuilderT(
+    const std::string& eName, Communicate* comm) :
+    MPIObjectBase(comm),
+    addsignforM(false),
+    expandlm(GAUSSIAN_EXPAND),
+    Morder("gaussian"),
+    sph("default"),
+    basisType("Numerical"),
+    elementType(eName),
+    Normalized("yes")
+{
+    // mmorales: for "Cartesian Gaussian", m is an integer that maps
+    //           the component to Gamess notation, see
+    //           Numerics/CartesianTensor.h
+    nlms_id["n"] = q_n;
+    nlms_id["l"] = q_l;
+    nlms_id["m"] = q_m;
+    nlms_id["s"] = q_s;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::put(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)");
+    // Register valid attributes attributes
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(basisType, "type");
+    aAttrib.add(sph, "angular");
+    aAttrib.add(addsignforM, "expM");
+    aAttrib.add(Morder, "expandYlm");
+    aAttrib.add(Normalized, "normalized");
+    aAttrib.put(cur);
+    PRE.echo(cur);
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+
+    // Numerical basis is a special case
+    if (basisType == "Numerical")
+        myComm->barrier_and_abort(
+            "Purely numerical atomic orbitals are not supported any longer.");
+
+    return true;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::putH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)");
+    std::string CenterID, basisName;
+
+    if (myComm->rank() == 0) {
+        hin.read(sph, "angular");
+        hin.read(CenterID, "elementType");
+        hin.read(Normalized, "normalized");
+        hin.read(Morder, "expandYlm");
+        hin.read(basisName, "name");
+    }
+
+    myComm->bcast(sph);
+    myComm->bcast(Morder);
+    myComm->bcast(CenterID);
+    myComm->bcast(Normalized);
+    myComm->bcast(basisName);
+    myComm->bcast(basisType);
+    myComm->bcast(addsignforM);
+
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+    app_log() << R"(<input node="atomicBasisSet" name=")" << basisName
+              << "\" expandYlm=\"" << Morder << "\" angular=\"" << sph
+              << "\" elementType=\"" << CenterID << "\" normalized=\""
+              << Normalized << "\" type=\"" << basisType << "\" expM=\""
+              << addsignforM << "\" />" << std::endl;
+
+    return true;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSet(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+    // process the basic property: maximun angular momentum, the number of basis
+    // functions to be added
+    std::vector<xmlNodePtr> radGroup;
+    xmlNodePtr cur1 = cur->xmlChildrenNode;
+    xmlNodePtr gptr = 0;
+    while (cur1 != NULL) {
+        std::string cname1((const char*)(cur1->name));
+        if (cname1 == "basisGroup") {
+            radGroup.push_back(cur1);
+            const int l = std::stoi(getXMLAttributeValue(cur1, "l"));
+            Lmax = std::max(Lmax, l);
+            // expect that only Rnl is given
+            if (expandlm == CARTESIAN_EXPAND ||
+                expandlm == DIRAC_CARTESIAN_EXPAND)
+                num += (l + 1) * (l + 2) / 2;
+            else if (expandlm)
+                num += 2 * l + 1;
+            else
+                num++;
+        }
+        else if (cname1 == "grid") {
+            gptr = cur1;
+        }
+        cur1 = cur1->next;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGrid(
+        gptr, basisType); // assign a radial grid for the new center
+    std::vector<xmlNodePtr>::iterator it(radGroup.begin());
+    std::vector<xmlNodePtr>::iterator it_end(radGroup.end());
+    std::vector<int> all_nl;
+    while (it != it_end) {
+        cur1 = (*it);
+        xmlAttrPtr att = cur1->properties;
+        while (att != NULL) {
+            std::string aname((const char*)(att->name));
+            if (aname == "rid" || aname == "id")
+            // accept id/rid
+            {
+                rnl = (const char*)(att->children->content);
+            }
+            else {
+                std::map<std::string, int>::iterator iit = nlms_id.find(aname);
+                if (iit != nlms_id.end())
+                // valid for n,l,m,s
+                {
+                    nlms[(*iit).second] =
+                        atoi((const char*)(att->children->content));
+                }
+            }
+            att = att->next;
+        }
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+        ++it;
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSetH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+
+    int numbasisgroups(0);
+    if (myComm->rank() == 0) {
+        if (!hin.readEntry(numbasisgroups, "NbBasisGroups"))
+            PRE.error(
+                "Could not read NbBasisGroups in H5; Probably Corrupt H5 file",
+                true);
+    }
+    myComm->bcast(numbasisgroups);
+
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        int l(0);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(l, "l");
+            hin.pop();
+        }
+        myComm->bcast(l);
+
+        Lmax = std::max(Lmax, l);
+        // expect that only Rnl is given
+        if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
+            num += (l + 1) * (l + 2) / 2;
+        else if (expandlm)
+            num += 2 * l + 1;
+        else
+            num++;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center
+    std::vector<int> all_nl;
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(rnl, "rid");
+            hin.read(nlms[0], "n");
+            hin.read(nlms[1], "l");
+        }
+        myComm->bcast(rnl);
+        myComm->bcast(nlms[0]);
+        myComm->bcast(nlms[1]);
+
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+
+        if (myComm->rank() == 0)
+            hin.pop();
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+int
+AOBasisBuilderT<COT>::expandYlm(
+    COT* aos, std::vector<int>& all_nl, int expandlm)
+{
+    int num = 0;
+    if (expandlm == GAUSSIAN_EXPAND) {
+        app_log() << "Expanding Ylm according to Gaussian98" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << 2 * l + 1
+                      << " spherical orbitals for l= " << l << std::endl;
+            switch (l) {
+            case (0):
+                aos->LM[num] = aos->Ylm.index(0, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1): // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default: // 0,1,-1,2,-2,...,l,-l
+                aos->LM[num] = aos->Ylm.index(l, 0);
+                aos->NL[num] = nl;
+                num++;
+                for (int tm = 1; tm <= l; tm++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                    num++;
+                    aos->LM[num] = aos->Ylm.index(l, -tm);
+                    aos->NL[num] = nl;
+                    num++;
+                }
+                break;
+            }
+        }
+    }
+    else if (expandlm == MOD_NATURAL_EXPAND) {
+        app_log()
+            << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l"
+            << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            if (l == 1) {
+                // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+            }
+            else {
+                for (int tm = -l; tm <= l; tm++, num++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                }
+            }
+        }
+    }
+    else if (expandlm == NATURAL_EXPAND) {
+        app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            for (int tm = -l; tm <= l; tm++, num++) {
+                aos->LM[num] = aos->Ylm.index(l, tm);
+                aos->NL[num] = nl;
+            }
+        }
+    }
+    else if (expandlm == CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to Gamess "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) {
+                aos->LM[num] = nbefore + i;
+                aos->NL[num] = nl;
+                num++;
+            }
+        }
+    }
+    else if (expandlm == DIRAC_CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to DIRAC "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            switch (l) {
+            case (0):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (2):
+                aos->LM[num] = nbefore + 0; // xx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // yz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (3):
+                aos->LM[num] = nbefore + 0; // xxx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xxy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xxz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // xyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // xyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // xzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // yyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // yzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zzz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (4):
+                aos->LM[num] = nbefore + 0; // 400
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 310
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 301
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 220
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 211
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 202
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 130
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 121
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 112
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 103
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 040
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 031
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 022
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 013
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 004
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (5):
+                aos->LM[num] = nbefore + 0; // 500
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 410
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 401
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 320
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 311
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 302
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 230
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 221
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 212
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 203
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 140
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 131
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 122
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 113
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 104
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 050
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 041
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 032
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 023
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 014
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 005
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (6):
+                aos->LM[num] = nbefore + 0; // 600
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 510
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 501
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 420
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 411
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 402
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 330
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 21; // 321
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 22; // 312
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 303
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 240
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 23; // 231
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 27; // 222
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 25; // 213
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 204
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 150
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 141
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 24; // 132
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 26; // 123
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 114
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 105
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 060
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 051
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 042
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 033
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 024
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 015
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 006
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default:
+                myComm->barrier_and_abort(
+                    "Cartesian Tensor only defined up to Lmax=6. Aborting\n");
+                break;
+            }
+        }
+    }
+    else {
+        for (int ind = 0; ind < all_nl.size(); ind++) {
+            int nl = all_nl[ind];
+            int l = aos->RnlID[nl][q_l];
+            int m = aos->RnlID[nl][q_m];
+            // assign the index for real Spherical Harmonic with (l,m)
+            aos->LM[num] = aos->Ylm.index(l, m);
+            // assign the index for radial orbital with (n,l)
+            aos->NL[num] = nl;
+            // increment number of basis functions
+            num++;
+        }
+    }
+    return num;
+}
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
new file mode 100644
index 00000000000..144b2b4dc9a
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
@@ -0,0 +1,75 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+#define QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+
+
+#include "Message/MPIObjectBase.h"
+#include "hdf/hdf_archive.h"
+#include "QMCWaveFunctions/SPOSet.h"
+
+namespace qmcplusplus
+{
+/** atomic basisset builder
+   * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet<RF,SH>
+   *
+   * Reimplement AtomiSPOSetBuilder.h
+   */
+template<typename COT>
+class AOBasisBuilderT : public MPIObjectBase
+{
+public:
+  enum
+  {
+    DONOT_EXPAND    = 0,
+    GAUSSIAN_EXPAND = 1,
+    NATURAL_EXPAND,
+    CARTESIAN_EXPAND,
+    MOD_NATURAL_EXPAND,
+    DIRAC_CARTESIAN_EXPAND
+  };
+
+private:
+  bool addsignforM;
+  int expandlm;
+  std::string Morder;
+  std::string sph;
+  std::string basisType;
+  std::string elementType;
+  std::string Normalized;
+
+  ///map for the radial orbitals
+  std::map<std::string, int> RnlID;
+
+  ///map for (n,l,m,s) to its quantum number index
+  std::map<std::string, int> nlms_id;
+
+public:
+  AOBasisBuilderT(const std::string& eName, Communicate* comm);
+
+  bool put(xmlNodePtr cur);
+  bool putH5(hdf_archive& hin);
+
+  SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; }
+
+  std::unique_ptr<COT> createAOSet(xmlNodePtr cur);
+  std::unique_ptr<COT> createAOSetH5(hdf_archive& hin);
+
+  int expandYlm(COT* aos, std::vector<int>& all_nl, int expandlm = DONOT_EXPAND);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
index 4e5a3fd2b0f..e35acdad775 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
@@ -1,205 +1,208 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //
-// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
+// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National
+// Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-#include "LCAOSpinorBuilder.h"
-#include "QMCWaveFunctions/SpinorSet.h"
+#include "LCAOSpinorBuilderT.h"
+
+#include "Message/CommOperators.h"
 #include "OhmmsData/AttributeSet.h"
+#include "QMCWaveFunctions/SpinorSetT.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "hdf/hdf_archive.h"
-#include "Message/CommOperators.h"
 
 namespace qmcplusplus
 {
-template<class T>
-LCAOSpinorBuilderT<T>::LCAOSpinorBuilderT(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur)
-    : LCAOrbitalBuilder(els, ions, comm, cur)
+template <class T>
+LCAOSpinorBuilderT<T>::LCAOSpinorBuilderT(
+    ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) :
+    LCAOrbitalBuilderT<T>(els, ions, comm, cur)
 {
-  ClassName = "LCAOSpinorBuilder";
+    this->ClassName = "LCAOSpinorBuilder";
 
-  if (h5_path == "")
-    myComm->barrier_and_abort("LCAOSpinorBuilder only works with href");
+    if (this->h5_path == "")
+        this->myComm->barrier_and_abort(
+            "LCAOSpinorBuilder only works with href");
 }
 
-template<class T>
-std::unique_ptr<SPOSetT<T>> LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template <class T>
+std::unique_ptr<SPOSetT<T>>
+LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-  ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)");
-  std::string spo_name(""), optimize("no");
-  std::string basisset_name("LCAOBSet");
-  OhmmsAttributeSet spoAttrib;
-  spoAttrib.add(spo_name, "name");
-  spoAttrib.add(optimize, "optimize");
-  spoAttrib.add(basisset_name, "basisset");
-  spoAttrib.put(cur);
-
-  BasisSet_t* myBasisSet = nullptr;
-  if (basisset_map_.find(basisset_name) == basisset_map_.end())
-    myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n");
-  else
-    myBasisSet = basisset_map_[basisset_name].get();
-
-  if (optimize == "yes")
-    app_log() << "  SPOSet " << spo_name << " is optimizable\n";
-
-  std::unique_ptr<LCAOrbitalSet> upspo =
-      std::make_unique<LCAOrbitalSet>(spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-  std::unique_ptr<LCAOrbitalSet> dnspo =
-      std::make_unique<LCAOrbitalSet>(spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-
-  loadMO(*upspo, *dnspo, cur);
-
-  //create spinor and register up/dn
-  auto spinor_set = std::make_unique<SpinorSet>(spo_name);
-  spinor_set->set_spos(std::move(upspo), std::move(dnspo));
-  return spinor_set;
+    ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
+    std::string spo_name(""), optimize("no");
+    std::string basisset_name("LCAOBSet");
+    OhmmsAttributeSet spoAttrib;
+    spoAttrib.add(spo_name, "name");
+    spoAttrib.add(optimize, "optimize");
+    spoAttrib.add(basisset_name, "basisset");
+    spoAttrib.put(cur);
+
+    BasisSet_t* myBasisSet = nullptr;
+    if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end())
+        this->myComm->barrier_and_abort(
+            "basisset \"" + basisset_name + "\" cannot be found\n");
+    else
+        myBasisSet = this->basisset_map_[basisset_name].get();
+
+    if (optimize == "yes")
+        app_log() << "  SPOSet " << spo_name << " is optimizable\n";
+
+    auto upspo = std::make_unique<LCAOrbitalSetT<T>>(
+        spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+    auto dnspo = std::make_unique<LCAOrbitalSetT<T>>(
+        spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+
+    loadMO(*upspo, *dnspo, cur);
+
+    // create spinor and register up/dn
+    auto spinor_set = std::make_unique<SpinorSetT<T>>(spo_name);
+    spinor_set->set_spos(std::move(upspo), std::move(dnspo));
+    return spinor_set;
 }
 
-template<class T>
-bool LCAOSpinorBuilderT<T>::loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
+template <class T>
+bool
+LCAOSpinorBuilderT<T>::loadMO(
+    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
 {
-  bool PBC = false;
-  int norb = up.getBasisSetSize();
-  std::string debugc("no");
-  OhmmsAttributeSet aAttrib;
-  aAttrib.add(norb, "size");
-  aAttrib.add(debugc, "debug");
-  aAttrib.put(cur);
-
-  up.setOrbitalSetSize(norb);
-  dn.setOrbitalSetSize(norb);
-
-  xmlNodePtr occ_ptr = nullptr;
-  cur                = cur->xmlChildrenNode;
-  while (cur != nullptr)
-  {
-    std::string cname((const char*)(cur->name));
-    if (cname == "occupation")
-    {
-      occ_ptr = cur;
+    bool PBC = false;
+    int norb = up.getBasisSetSize();
+    std::string debugc("no");
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(norb, "size");
+    aAttrib.add(debugc, "debug");
+    aAttrib.put(cur);
+
+    up.setOrbitalSetSize(norb);
+    dn.setOrbitalSetSize(norb);
+
+    xmlNodePtr occ_ptr = nullptr;
+    cur = cur->xmlChildrenNode;
+    while (cur != nullptr) {
+        std::string cname((const char*)(cur->name));
+        if (cname == "occupation") {
+            occ_ptr = cur;
+        }
+        cur = cur->next;
     }
-    cur = cur->next;
-  }
-
-  hdf_archive hin(myComm);
-  if (myComm->rank() == 0)
-  {
-    if (!hin.open(h5_path, H5F_ACC_RDONLY))
-      myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing or incorrect path to H5 file.");
-    hin.push("PBC");
-    PBC = false;
-    hin.read(PBC, "PBC");
-    hin.close();
-  }
-  myComm->bcast(PBC);
-  if (PBC)
-    myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
-
-  bool success = putFromH5(up, dn, occ_ptr);
-
-
-  if (debugc == "yes")
-  {
-    app_log() << "UP:  Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols()
-              << std::endl;
-    app_log() << *up.C << std::endl;
-    app_log() << "DN:  Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols()
-              << std::endl;
-    app_log() << *dn.C << std::endl;
-  }
-  return success;
-}
 
-template<class T>
-bool LCAOSpinorBuilderT<T>::putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
-{
-  if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0)
-  {
-    myComm->barrier_and_abort("LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
-    return false;
-  }
-
-  bool success = true;
-  hdf_archive hin(myComm);
-  if (myComm->rank() == 0)
-  {
-    istd::string setname = "/Super_Twist/eigenset_0";
-    readRealMatrixFromH5(hin, setname, upReal);
-    setname += "_imag";
-    readRealMatrixFromH5(hin, setname, upImag);
-
-    af(!hin.open(h5_path, H5F_ACC_RDONLY))
-        myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or incorrect path to H5 file");
-
-    Matrix<RealType> upReal;
-    Matrix<RealType> upImag;
-    ssert(upReal.rows() == upImag.rows());
-    assert(upReal.cols() == upImag.cols());
-
-    Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
-    for (int i = 0; i < upTemp.rows(); i++)
-    {
-      for (int j = 0; j < upTemp.cols(); j++)
-      {
-        upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]);
-      }
+    hdf_archive hin(this->myComm);
+    if (this->myComm->rank() == 0) {
+        if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+            this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing "
+                                            "or incorrect path to H5 file.");
+        hin.push("PBC");
+        PBC = false;
+        hin.read(PBC, "PBC");
+        hin.close();
     }
-
-    Matrix<RealType> dnReal;
-    Matrix<RealType> dnImag;
-    setname = "/Super_Twist/eigenset_1";
-    readRealMatrixFromH5(hin, setname, dnReal);
-    setname += "_imag";
-    readRealMatrixFromH5(hin, setname, dnImag);
-
-    assert(dnReal.rows() == dnImag.rows());
-    assert(dnReal.cols() == dnImag.cols());
-
-    Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
-    for (int i = 0; i < dnTemp.rows(); i++)
-    {
-      for (int j = 0; j < dnTemp.cols(); j++)
-      {
-        dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
-      }
+    this->myComm->bcast(PBC);
+    if (PBC)
+        this->myComm->barrier_and_abort(
+            "LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
+
+    bool success = putFromH5(up, dn, occ_ptr);
+
+    if (debugc == "yes") {
+        app_log() << "UP:  Single-particle orbital coefficients dims="
+                  << up.C->rows() << " x " << up.C->cols() << std::endl;
+        app_log() << *up.C << std::endl;
+        app_log() << "DN:  Single-particle orbital coefficients dims="
+                  << dn.C->rows() << " x " << dn.C->cols() << std::endl;
+        app_log() << *dn.C << std::endl;
     }
+    return success;
+}
 
-    assert(upReal.rows() == dnReal.rows());
-    assert(upReal.cols() == dnReal.cols());
-
-    Occ.resize(upReal.rows());
-    success = putOccupation(up, occ_ptr);
-
-    int norbs = up.getOrbitalSetSize();
-
-    int n = 0, i = 0;
-    while (i < norbs)
-    {
-      if (Occ[n] > 0.0)
-      {
-        std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
-        std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
-        i++;
-      }
-      n++;
+template <class T>
+bool
+LCAOSpinorBuilderT<T>::putFromH5(
+    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
+{
+    if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) {
+        this->myComm->barrier_and_abort(
+            "LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
+        return false;
     }
 
-    hin.close();
-  }
+    bool success = true;
+    hdf_archive hin(this->myComm);
+    if (this->myComm->rank() == 0) {
+        Matrix<RealType> upReal;
+        Matrix<RealType> upImag;
+        std::string setname = "/Super_Twist/eigenset_0";
+        this->readRealMatrixFromH5(hin, setname, upReal);
+        setname += "_imag";
+        this->readRealMatrixFromH5(hin, setname, upImag);
+
+        if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+            this->myComm->barrier_and_abort(
+                "LCAOSpinorBuilder::putFromH5 missing or "
+                "incorrect path to H5 file");
+
+        assert(upReal.rows() == upImag.rows());
+        assert(upReal.cols() == upImag.cols());
+
+        Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
+        for (int i = 0; i < upTemp.rows(); i++) {
+            for (int j = 0; j < upTemp.cols(); j++) {
+                upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]);
+            }
+        }
+
+        Matrix<RealType> dnReal;
+        Matrix<RealType> dnImag;
+        setname = "/Super_Twist/eigenset_1";
+        this->readRealMatrixFromH5(hin, setname, dnReal);
+        setname += "_imag";
+        this->readRealMatrixFromH5(hin, setname, dnImag);
+
+        assert(dnReal.rows() == dnImag.rows());
+        assert(dnReal.cols() == dnImag.cols());
+
+        Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
+        for (int i = 0; i < dnTemp.rows(); i++) {
+            for (int j = 0; j < dnTemp.cols(); j++) {
+                dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
+            }
+        }
+
+        assert(upReal.rows() == dnReal.rows());
+        assert(upReal.cols() == dnReal.cols());
+
+        this->Occ.resize(upReal.rows());
+        success = this->putOccupation(up, occ_ptr);
+
+        int norbs = up.getOrbitalSetSize();
+
+        int n = 0, i = 0;
+        while (i < norbs) {
+            if (this->Occ[n] > 0.0) {
+                std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
+                std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
+                i++;
+            }
+            n++;
+        }
+
+        hin.close();
+    }
 
 #ifdef HAVE_MPI
-  myComm->comm.broadcast_n(up.C->data(), up.C->size());
-  myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
+    this->myComm->comm.broadcast_n(up.C->data(), up.C->size());
+    this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
 #endif
 
-  return success;
+    return success;
 }
 
 template class LCAOSpinorBuilderT<std::complex<double>>;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
index 62b40b43b11..6bf902c3166 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
@@ -10,8 +10,8 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H
-#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H
+#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H
+#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H
 
 #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h"
 
@@ -27,6 +27,10 @@ template<class T>
 class LCAOSpinorBuilderT : public LCAOrbitalBuilderT<T>
 {
 public:
+    using BasisSet_t = typename LCAOrbitalBuilderT<T>::BasisSet_t;
+    using RealType = typename LCAOrbitalBuilderT<T>::RealType;
+    using ValueType = typename LCAOrbitalBuilderT<T>::ValueType;
+
   /** constructor
      * \param els reference to the electrons
      * \param ions reference to the ions
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
index 4e1e4f6bd1b..f35d56efd16 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
@@ -19,22 +19,20 @@
 
 #include "LCAOrbitalBuilderT.h"
 
-#include "AOBasisBuilder.h"
+#include "AOBasisBuilderT.h"
+#include "CPU/math.hpp"
+#include "CuspCorrectionConstructionT.h"
 #include "LCAOrbitalSetT.h"
+#include "LCAOrbitalSetWithCorrectionT.h"
+#include "Message/CommOperators.h"
 #include "MultiFunctorAdapter.h"
 #include "MultiQuinticSpline1D.h"
 #include "Numerics/SoaCartesianTensor.h"
 #include "Numerics/SoaSphericalTensor.h"
 #include "OhmmsData/AttributeSet.h"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "SoaAtomicBasisSet.h"
-#include "SoaLocalizedBasisSet.h"
-#if !defined(QMC_COMPLEX)
-#include "CuspCorrectionConstructionT.h"
-#include "LCAOrbitalSetWithCorrectionT.h"
-#endif
-#include "CPU/math.hpp"
-#include "Message/CommOperators.h"
+#include "SoaAtomicBasisSetT.h"
+#include "SoaLocalizedBasisSetT.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "hdf/hdf_archive.h"
 
@@ -61,8 +59,8 @@ struct ao_traits<T, ORBT, 0, 0>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for numerical-spherical AO */
@@ -71,8 +69,8 @@ struct ao_traits<T, ORBT, 0, 1>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -81,8 +79,8 @@ struct ao_traits<T, ORBT, 1, 0>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -91,8 +89,8 @@ struct ao_traits<T, ORBT, 1, 1>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for STO-spherical AO */
@@ -101,8 +99,8 @@ struct ao_traits<T, ORBT, 2, 1>
 {
     using radial_type = MultiFunctorAdapter<SlaterCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 inline bool
@@ -243,7 +241,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<0, 1>(cur);
@@ -251,7 +249,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<0, 0>(cur);
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<1, 1>(cur);
@@ -259,12 +257,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<1, 0>(cur);
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSet<2, 1>(cur);
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
 
@@ -312,7 +310,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<0, 1>();
@@ -320,7 +318,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<0, 0>();
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<1, 1>();
@@ -328,12 +326,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<1, 0>();
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSetH5<2, 1>();
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
     return std::unique_ptr<BasisSet_t>(myBasisSet);
@@ -374,7 +372,7 @@ LCAOrbitalBuilderT<T>::createBasisSet(xmlNodePtr cur)
             auto it = std::find(
                 ao_built_centers.begin(), ao_built_centers.end(), elementType);
             if (it == ao_built_centers.end()) {
-                AOBasisBuilder<ao_type> any(elementType, this->myComm);
+                AOBasisBuilderT<ao_type> any(elementType, this->myComm);
                 any.put(cur);
                 auto aoBasis = any.createAOSet(cur);
                 if (aoBasis) {
@@ -453,7 +451,7 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
         auto it = std::find(
             ao_built_centers.begin(), ao_built_centers.end(), elementType);
         if (it == ao_built_centers.end()) {
-            AOBasisBuilder<ao_type> any(elementType, this->myComm);
+            AOBasisBuilderT<ao_type> any(elementType, this->myComm);
             any.putH5(hin);
             auto aoBasis = any.createAOSetH5(hin);
             if (aoBasis) {
@@ -478,6 +476,174 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
     return mBasisSet;
 }
 
+template <>
+std::unique_ptr<SPOSetT<double>>
+LCAOrbitalBuilderT<double>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<double>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<double>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSet tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<double>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<double>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<double>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<double>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<double>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<double>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<double>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<float>>
+LCAOrbitalBuilderT<float>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<float>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<float>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSet tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<float>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<float>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<float>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<float>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<float>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<float>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<float>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<double>>>
+LCAOrbitalBuilderT<std::complex<double>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<float>>>
+LCAOrbitalBuilderT<std::complex<float>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+}
+
 template <typename T>
 std::unique_ptr<SPOSetT<T>>
 LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
@@ -501,18 +667,8 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 
     std::unique_ptr<SPOSetT<T>> sposet;
     if (doCuspCorrection) {
-#if defined(QMC_COMPLEX)
-        this->myComm->barrier_and_abort(
-            "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
-            "supported on complex LCAO.");
-#else
-        app_summary() << "        Using cusp correction." << std::endl;
-        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(
-            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
-        loadMO(lcwc->lcao, cur);
-        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
-        sposet = std::move(lcwc);
-#endif
+        createWithCuspCorrection(
+            cur, spo_name, cusp_file, std::move(myBasisSet));
     }
     else {
         auto lcos = std::make_unique<LCAOrbitalSetT<T>>(
@@ -521,65 +677,6 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
         sposet = std::move(lcos);
     }
 
-#if !defined(QMC_COMPLEX)
-    if (doCuspCorrection) {
-        // Create a temporary particle set to use for cusp initialization.
-        // The particle coordinates left at the end are unsuitable for further
-        // computations. The coordinates get set to nuclear positions, which
-        // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
-        // This problem only appears when the electron positions are specified
-        // in the input. The random particle placement step executes after this
-        // part of the code, overwriting the leftover positions from the cusp
-        // initialization.
-        ParticleSet tmp_targetPtcl(targetPtcl);
-
-        const int num_centers = sourcePtcl.getTotalNum();
-        auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<T>&>(*sposet);
-
-        const int orbital_set_size = lcwc.getOrbitalSetSize();
-        Matrix<CuspCorrectionParametersT<T>> info(
-            num_centers, orbital_set_size);
-
-        // set a default file name if not given
-        if (cusp_file.empty())
-            cusp_file = spo_name + ".cuspInfo.xml";
-
-        bool file_exists(
-            this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
-        this->myComm->bcast(file_exists);
-        app_log() << "  Cusp correction file " << cusp_file
-                  << (file_exists ? " exits." : " doesn't exist.") << std::endl;
-
-        // validate file if it exists
-        if (file_exists) {
-            bool valid = 0;
-            if (this->myComm->rank() == 0)
-                valid = CuspCorrectionConstructionT<T>::readCuspInfo(
-                    cusp_file, spo_name, orbital_set_size, info);
-            this->myComm->bcast(valid);
-            if (!valid)
-                this->myComm->barrier_and_abort(
-                    "Invalid cusp correction file " + cusp_file);
-#ifdef HAVE_MPI
-            for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
-                for (int center_idx = 0; center_idx < num_centers; center_idx++)
-                    CuspCorrectionConstructionT<T>::broadcastCuspInfo(
-                        info(center_idx, orb_idx), *this->myComm, 0);
-#endif
-        }
-        else {
-            CuspCorrectionConstructionT<T>::generateCuspInfo(info,
-                tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
-            if (this->myComm->rank() == 0)
-                CuspCorrectionConstructionT<T>::saveCusp(
-                    cusp_file, info, spo_name);
-        }
-
-        CuspCorrectionConstructionT<T>::applyCuspCorrection(
-            info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
-    }
-#endif
-
     return sposet;
 }
 
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
index a746326df7d..83386951d28 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
@@ -125,6 +125,10 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT<T>
         Matrix<RealType>& Creal) const;
 
 private:
+    /// enable cusp correction
+    std::unique_ptr<SPOSetT<T>>
+    createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name,
+        std::string cusp_file, std::unique_ptr<BasisSet_t>&& myBasisSet);
     /// load a basis set from XML input
     std::unique_ptr<BasisSet_t>
     loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent);
diff --git a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
index 110491c006d..ee9ecde7fef 100644
--- a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
+++ b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
@@ -21,6 +21,7 @@
 #include "hdf/hdf_archive.h"
 #include "LCAO/MultiQuinticSpline1D.h"
 #include "LCAO/SoaAtomicBasisSet.h"
+#include "LCAO/SoaAtomicBasisSetT.h"
 
 namespace qmcplusplus
 {
@@ -145,5 +146,60 @@ class RadialOrbitalSetBuilder<SoaAtomicBasisSet<MultiFunctorAdapter<FN>, SH>> :
     m_orbitals.setRmax(0); //set Rmax
   }
 };
+
+template<typename FN, typename SH, typename ORBT>
+class RadialOrbitalSetBuilder<SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>> : public MPIObjectBase
+{
+public:
+  using COT             = SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>;
+  using RadialOrbital_t = MultiFunctorAdapter<FN>;
+  using single_type     = typename RadialOrbital_t::single_type;
+
+  ///true, if the RadialOrbitalType is normalized
+  bool Normalized;
+  ///orbitals to build
+  COT& m_orbitals;
+
+  ///constructor
+  RadialOrbitalSetBuilder(Communicate* comm, COT& aos) : MPIObjectBase(comm), Normalized(true), m_orbitals(aos) {}
+
+  ///implement functions used by AOBasisBuilder
+  bool addGrid(xmlNodePtr cur, const std::string& rad_type) { return true; }
+  bool addGridH5(hdf_archive& hin) { return true; }
+  bool openNumericalBasisH5(xmlNodePtr cur) { return true; }
+  bool put(xmlNodePtr cur)
+  {
+    const std::string a(lowerCase(getXMLAttributeValue(cur, "normalized")));
+    if (a == "no")
+      Normalized = false;
+    return true;
+  }
+
+  bool addRadialOrbital(xmlNodePtr cur, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroup(cur);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+    return true;
+  }
+
+  bool addRadialOrbitalH5(hdf_archive& hin, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroupH5(hin, *myComm);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+
+    return true;
+  }
+
+  void finalize()
+  {
+    m_orbitals.setRmax(0); //set Rmax
+  }
+};
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
index 71e36230bd7..4d03b3d652c 100644
--- a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
+++ b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
@@ -166,7 +166,7 @@ bool RadialOrbitalSetBuilder<COT>::addGrid(xmlNodePtr cur, const std::string& ra
     hin.pop();
   }
   else
-    input_grid = OneDimGridFactory::createGrid(cur);
+    input_grid = OneDimGridFactory<RealType>::createGrid(cur);
 
   //set zero to use std::max
   m_rcut_safe = 0;
diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
new file mode 100644
index 00000000000..1f1bc53d5ee
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
@@ -0,0 +1,775 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file SoaAtomicBasisSetT.h
+ */
+#ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+#define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+
+#include "CPU/math.hpp"
+#include "OptimizableObject.h"
+
+namespace qmcplusplus
+{
+template <typename T>
+struct CorrectPhaseFunctor
+{
+    const TinyVector<double, 3>& superTwist;
+
+    template <typename PosType>
+    T
+    operator()(PosType Tv) const
+    {
+        return 1.0;
+    }
+};
+
+template <typename T>
+struct CorrectPhaseFunctor<std::complex<T>>
+{
+    const TinyVector<double, 3>& superTwist;
+
+    template <typename PosType>
+    std::complex<T>
+    operator()(PosType Tv) const
+    {
+        T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] +
+            superTwist[2] * Tv[2];
+        T s, c;
+        qmcplusplus::sincos(-phasearg, &s, &c);
+        return {c, s};
+    };
+};
+
+/* A basis set for a center type
+ *
+ * @tparam ROT : radial function type, e.g.,NGFunctor<RealType>
+ * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion
+ *
+ * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$
+ */
+template <typename ROT, typename SH, typename ORBT>
+struct SoaAtomicBasisSetT
+{
+    using RadialOrbital_t = ROT;
+    using RealType = typename ROT::RealType;
+    using GridType = typename ROT::GridType;
+    using ValueType = ORBT;
+
+    /// size of the basis set
+    int BasisSetSize;
+    /// Number of Cell images for the evaluation of the orbital with PBC. If No
+    /// PBC, should be 0;
+    TinyVector<int, 3> PBCImages;
+    /// Coordinates of SuperTwist
+    TinyVector<double, 3> SuperTwist;
+    /// Phase Factor array
+    std::vector<ValueType> periodic_image_phase_factors;
+    /// maximum radius of this center
+    RealType Rmax;
+    /// spherical harmonics
+    SH Ylm;
+    /// radial orbitals
+    ROT MultiRnl;
+    /// index of the corresponding real Spherical Harmonic with quantum numbers
+    /// \f$ (l,m) \f$
+    aligned_vector<int> LM;
+    /**index of the corresponding radial orbital with quantum numbers \f$ (n,l)
+     * \f$ */
+    aligned_vector<int> NL;
+    /// container for the quantum-numbers
+    std::vector<QuantumNumberType> RnlID;
+    /// temporary storage
+    VectorSoaContainer<RealType, 4> tempS;
+
+    /// the constructor
+    explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) :
+        Ylm(lmax, addsignforM)
+    {
+    }
+
+    void
+    checkInVariables(opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkInVariables(active);
+    }
+
+    void
+    checkOutVariables(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkOutVariables(active);
+    }
+
+    void
+    resetParameters(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->resetParameters(active);
+    }
+
+    /** return the number of basis functions
+     */
+    inline int
+    getBasisSetSize() const
+    {
+        //=NL.size();
+        return BasisSetSize;
+    }
+
+    /** Set the number of periodic image for the evaluation of the orbitals and
+     * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1),
+     * SuperTwist(0,0,0) and the PhaseFactor=1.
+     */
+    void
+    setPBCParams(const TinyVector<int, 3>& pbc_images,
+        const TinyVector<double, 3> supertwist,
+        const std::vector<ValueType>& PeriodicImagePhaseFactors)
+    {
+        PBCImages = pbc_images;
+        periodic_image_phase_factors = PeriodicImagePhaseFactors;
+        SuperTwist = supertwist;
+    }
+
+    /** implement a BasisSetBase virtual function
+     *
+     * Set Rmax and BasisSetSize
+     * @todo Should be able to overwrite Rmax to be much smaller than the
+     * maximum grid
+     */
+    inline void
+    setBasisSetSize(int n)
+    {
+        BasisSetSize = LM.size();
+        tempS.resize(std::max(Ylm.size(), RnlID.size()));
+    }
+
+    /** Set Rmax */
+    template <typename RealType>
+    inline void
+    setRmax(RealType rmax)
+    {
+        Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
+    }
+
+    /// set the current offset
+    inline void
+    setCenter(int c, int offset)
+    {
+    }
+
+    /// Sets a boolean vector for S-type orbitals.  Used for cusp correction.
+    void
+    queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
+    {
+        for (int i = 0; i < BasisSetSize; i++) {
+            s_orbitals[i] = (RnlID[NL[i]][1] == 0);
+        }
+    }
+
+    /** evaluate VGL
+     */
+    template <typename LAT, typename PosType, typename VGL>
+    inline void
+    evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGL& vgl, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+        // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
+
+        const ValueType correctphase =
+            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+
+        constexpr RealType cone(1);
+        constexpr RealType ctwo(2);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgl.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgl.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgl.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgl.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+        auto* restrict d2psi = vgl.data(4) + offset;
+        const RealType* restrict ylm_l = Ylm[4]; // lap
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            d2psi[ib] = 0;
+        }
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGL(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr * Phase;
+                        dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
+                        dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
+                        dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
+                        d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) +
+                                         ctwo *
+                                             (gr_x * ang_x + gr_y * ang_y +
+                                                 gr_z * ang_z) +
+                                         vr * ylm_l[lm]) *
+                            Phase;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGH>
+    inline void
+    evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGH& vgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+            //      d2psi[ib]  = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.
+                        const RealType gr2_tmp =
+                            rinv * rinv * (d2phi[nl] - drnloverr);
+                        const RealType gr_xx = x * x * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * y * gr2_tmp;
+                        const RealType gr_xz = x * z * gr2_tmp;
+                        const RealType gr_yy = y * y * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * z * gr2_tmp;
+                        const RealType gr_zz = z * z * gr2_tmp + drnloverr;
+
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGHGH>
+    inline void
+    evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGHGH& vghgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+        RealType* restrict d3phi = tempS.data(3);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vghgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vghgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vghgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vghgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vghgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vghgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vghgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vghgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vghgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vghgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
+        const RealType* restrict ylm_xxx = Ylm[10];
+        auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
+        const RealType* restrict ylm_xxy = Ylm[11];
+        auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
+        const RealType* restrict ylm_xxz = Ylm[12];
+        auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
+        const RealType* restrict ylm_xyy = Ylm[13];
+        auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
+        const RealType* restrict ylm_xyz = Ylm[14];
+        auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
+        const RealType* restrict ylm_xzz = Ylm[15];
+        auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
+        const RealType* restrict ylm_yyy = Ylm[16];
+        auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
+        const RealType* restrict ylm_yyz = Ylm[17];
+        auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
+        const RealType* restrict ylm_yzz = Ylm[18];
+        auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
+        const RealType* restrict ylm_zzz = Ylm[19];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+
+            dghpsi_xxx[ib] = 0;
+            dghpsi_xxy[ib] = 0;
+            dghpsi_xxz[ib] = 0;
+            dghpsi_xyy[ib] = 0;
+            dghpsi_xyz[ib] = 0;
+            dghpsi_xzz[ib] = 0;
+            dghpsi_yyy[ib] = 0;
+            dghpsi_yyz[ib] = 0;
+            dghpsi_yzz[ib] = 0;
+            dghpsi_zzz[ib] = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1],
+                                   z = -dr_new[2];
+                    Ylm.evaluateVGHGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
+
+                    const RealType rinv = cone / r_new;
+                    const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.  This is p(r) in the notes.
+                        const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr);
+
+                        const RealType gr_xx = x * xu * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * yu * gr2_tmp;
+                        const RealType gr_xz = x * zu * gr2_tmp;
+                        const RealType gr_yy = y * yu * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * zu * gr2_tmp;
+                        const RealType gr_zz = z * zu * gr2_tmp + drnloverr;
+
+                        // This is q(r) in the notes.
+                        const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp;
+
+                        const RealType gr_xxx =
+                            xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
+                        const RealType gr_xxy =
+                            xu * xu * yu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_xxz =
+                            xu * xu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_xyy =
+                            xu * yu * yu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_xyz = xu * yu * zu * gr3_tmp;
+                        const RealType gr_xzz =
+                            xu * zu * zu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_yyy =
+                            yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
+                        const RealType gr_yyz =
+                            yu * yu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_yzz =
+                            yu * zu * zu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_zzz =
+                            zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
+
+                        // Angular derivatives up to third
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType ang_xxx = ylm_xxx[lm];
+                        const RealType ang_xxy = ylm_xxy[lm];
+                        const RealType ang_xxz = ylm_xxz[lm];
+                        const RealType ang_xyy = ylm_xyy[lm];
+                        const RealType ang_xyz = ylm_xyz[lm];
+                        const RealType ang_xzz = ylm_xzz[lm];
+                        const RealType ang_yyy = ylm_yyy[lm];
+                        const RealType ang_yyz = ylm_yyz[lm];
+                        const RealType ang_yzz = ylm_yzz[lm];
+                        const RealType ang_zzz = ylm_zzz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+
+                        dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx +
+                            3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx;
+                        dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy +
+                            gr_xx * ang_y + ang_xx * gr_y +
+                            2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x;
+                        dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz +
+                            gr_xx * ang_z + ang_xx * gr_z +
+                            2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x;
+                        dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy +
+                            gr_yy * ang_x + ang_yy * gr_x +
+                            2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y;
+                        dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz +
+                            gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
+                            ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y;
+                        dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz +
+                            gr_zz * ang_x + ang_zz * gr_x +
+                            2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z;
+                        dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy +
+                            3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy;
+                        dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz +
+                            gr_yy * ang_z + ang_yy * gr_z +
+                            2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y;
+                        dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz +
+                            gr_zz * ang_y + ang_zz * gr_y +
+                            2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z;
+                        dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz +
+                            3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz;
+                    }
+                }
+            }
+        }
+    }
+
+    /** evaluate V
+     */
+    template <typename LAT, typename PosType, typename VT>
+    inline void
+    evaluateV(const LAT& lattice, const RealType r, const PosType& dr,
+        VT* restrict psi, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        const ValueType correctphase =
+            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+
+        RealType* restrict ylm_v = tempS.data(0);
+        RealType* restrict phi_r = tempS.data(1);
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib)
+            psi[ib] = 0;
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
+                    MultiRnl.evaluate(r_new, phi_r);
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib)
+                        psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
+                }
+            }
+        }
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
new file mode 100644
index 00000000000..78318edda3b
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
@@ -0,0 +1,466 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SoaLocalizedBasisSetT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "Particle/DistanceTable.h"
+#include "SoaAtomicBasisSetT.h"
+
+#include <memory>
+
+namespace qmcplusplus
+{
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    ParticleSet& ions, ParticleSet& els) :
+    ions_(ions),
+    myTableIndex(els.addTable(ions,
+        DTModes::NEED_FULL_TABLE_ANYTIME |
+            DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
+    SuperTwist(0.0)
+{
+    NumCenters = ions.getTotalNum();
+    NumTargets = els.getTotalNum();
+    LOBasisSet.resize(ions.getSpeciesSet().getTotalNum());
+    BasisOffset.resize(NumCenters + 1);
+    BasisSetSize = 0;
+}
+
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    const SoaLocalizedBasisSetT& a) :
+    SoaBasisSetBase<ORBT>(a),
+    NumCenters(a.NumCenters),
+    NumTargets(a.NumTargets),
+    ions_(a.ions_),
+    myTableIndex(a.myTableIndex),
+    SuperTwist(a.SuperTwist),
+    BasisOffset(a.BasisOffset)
+{
+    LOBasisSet.reserve(a.LOBasisSet.size());
+    for (auto& elem : a.LOBasisSet)
+        LOBasisSet.push_back(std::make_unique<COT>(*elem));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setPBCParams(
+    const TinyVector<int, 3>& PBCImages, const TinyVector<double, 3> Sup_Twist,
+    const std::vector<ORBT>& phase_factor)
+{
+    for (int i = 0; i < LOBasisSet.size(); ++i)
+        LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor);
+
+    SuperTwist = Sup_Twist;
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setBasisSetSize(int nbs)
+{
+    const auto& IonID(ions_.GroupID);
+    if (BasisSetSize > 0 && nbs == BasisSetSize)
+        return;
+
+    if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) {
+        // evaluate the total basis dimension and offset for each center
+        BasisOffset[0] = 0;
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c + 1] =
+                BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize();
+        BasisSetSize = BasisOffset[NumCenters];
+    }
+    else {
+        // when particles are reordered due to grouping, AOs need to restore the
+        // input order to match MOs.
+        std::vector<int> map_input_to_storage(mapping.size());
+        for (int c = 0; c < NumCenters; c++)
+            map_input_to_storage[mapping[c]] = c;
+
+        std::vector<size_t> basis_offset_input_order(BasisOffset.size(), 0);
+        for (int c = 0; c < NumCenters; c++)
+            basis_offset_input_order[c + 1] = basis_offset_input_order[c] +
+                LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize();
+
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c] = basis_offset_input_order[mapping[c]];
+
+        BasisSetSize = basis_offset_input_order[NumCenters];
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::queryOrbitalsForSType(
+    const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const
+{
+    const auto& IonID(ions_.GroupID);
+    for (int c = 0; c < NumCenters; c++) {
+        int idx = BasisOffset[c];
+        int bss = LOBasisSet[IonID[c]]->BasisSetSize;
+        std::vector<bool> local_is_s_orbital(bss);
+        LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital);
+        for (int k = 0; k < bss; k++) {
+            if (corrCenter[c]) {
+                is_s_orbital[idx++] = local_is_s_orbital[k];
+            }
+            else {
+                is_s_orbital[idx++] = false;
+            }
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGL(
+    const ParticleSet& P, int iat, vgl_type& vgl)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateVGL(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateVGL(
+    const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+    OffloadMWVGLArray& vgl_v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++) {
+        const auto& IonID(ions_.GroupID);
+        const auto& coordR = P_list[iw].activeR(iat);
+        const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
+        const auto& dist = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDists() :
+            d_table.getDistRow(iat);
+        const auto& displ = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDispls() :
+            d_table.getDisplRow(iat);
+
+        PosType Tv;
+
+        // number of walkers * BasisSetSize
+        auto stride = vgl_v.size(1) * BasisSetSize;
+        assert(BasisSetSize == vgl_v.size(2));
+        vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);
+
+        for (int c = 0; c < NumCenters; c++) {
+            Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+            Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+            Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+            LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c],
+                displ[c], BasisOffset[c], vgl_iw, Tv);
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGH(
+    const ParticleSet& P, int iat, vgh_type& vgh)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGHGH(
+    const ParticleSet& P, int iat, vghgh_type& vghgh)
+{
+    // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n");
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGHGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateV(
+    const ParticleSet& P, int iat, ORBT* restrict vals)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateV(
+            P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateValue(
+    const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVArray& v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++)
+        evaluateV(P_list[iw], iat, v.data_at(iw, 0));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceV(const ParticleSet& P,
+    int iat, const ParticleSet& ions, int jion, vgl_type& vgl)
+{
+    // We need to zero out the temporary array vgl.
+    auto* restrict gx = vgl.data(1);
+    auto* restrict gy = vgl.data(2);
+    auto* restrict gz = vgl.data(3);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+    }
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    Tv[0] = Tv[1] = Tv[2] = 0;
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+    LOBasisSet[IonID[jion]]->evaluateVGL(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceVGL(const ParticleSet& P,
+    int iat, const ParticleSet& ions, int jion, vghgh_type& vghgh)
+{
+    // We need to zero out the temporary array vghgh.
+    auto* restrict gx = vghgh.data(1);
+    auto* restrict gy = vghgh.data(2);
+    auto* restrict gz = vghgh.data(3);
+
+    auto* restrict hxx = vghgh.data(4);
+    auto* restrict hxy = vghgh.data(5);
+    auto* restrict hxz = vghgh.data(6);
+    auto* restrict hyy = vghgh.data(7);
+    auto* restrict hyz = vghgh.data(8);
+    auto* restrict hzz = vghgh.data(9);
+
+    auto* restrict gxxx = vghgh.data(10);
+    auto* restrict gxxy = vghgh.data(11);
+    auto* restrict gxxz = vghgh.data(12);
+    auto* restrict gxyy = vghgh.data(13);
+    auto* restrict gxyz = vghgh.data(14);
+    auto* restrict gxzz = vghgh.data(15);
+    auto* restrict gyyy = vghgh.data(16);
+    auto* restrict gyyz = vghgh.data(17);
+    auto* restrict gyzz = vghgh.data(18);
+    auto* restrict gzzz = vghgh.data(19);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+
+        hxx[ib] = 0;
+        hxy[ib] = 0;
+        hxz[ib] = 0;
+        hyy[ib] = 0;
+        hyz[ib] = 0;
+        hzz[ib] = 0;
+
+        gxxx[ib] = 0;
+        gxxy[ib] = 0;
+        gxxz[ib] = 0;
+        gxyy[ib] = 0;
+        gxyz[ib] = 0;
+        gxzz[ib] = 0;
+        gyyy[ib] = 0;
+        gyyz[ib] = 0;
+        gyzz[ib] = 0;
+        gzzz[ib] = 0;
+    }
+
+    // Since jion is indexed on the source ions not the ions_ the distinction
+    // between ions_ and ions is extremely important.
+    const auto& IonID(ions.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+
+    LOBasisSet[IonID[jion]]->evaluateVGHGH(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::add(int icenter, std::unique_ptr<COT> aos)
+{
+    LOBasisSet[icenter] = std::move(aos);
+}
+
+// TODO: this should be redone with template template parameters
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
new file mode 100644
index 00000000000..d0c446fae0d
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
@@ -0,0 +1,170 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaLocalizedBasisSetT.h
+ * @brief A derived class from BasisSetBase
+ *
+ * This is intended as a replacement for MolecularWaveFunctionComponent and
+ * any other localized basis set.
+ */
+#ifndef QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+#define QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+
+#include <memory>
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+
+namespace qmcplusplus
+{
+/** A localized basis set derived from SoaBasisSetBase<ORBT>
+ *
+ * This class performs the evaluation of the basis functions and their
+ * derivatives for each of the N-particles in a configuration.
+ * The template parameter COT denotes Centered-Orbital-Type which provides
+ * a set of localized orbitals associated with a center.
+ * The template parameter ORBT denotes the orbital value return type
+ */
+template<class COT, typename ORBT>
+class SoaLocalizedBasisSetT : public SoaBasisSetBase<ORBT>
+{
+public:
+  using RealType  = typename COT::RealType;
+  using BaseType  = SoaBasisSetBase<ORBT>;
+  using ValueType = ORBT;
+
+  using vgl_type          = typename BaseType::vgl_type;
+  using vgh_type          = typename BaseType::vgh_type;
+  using vghgh_type        = typename BaseType::vghgh_type;
+  using PosType           = typename ParticleSet::PosType;
+  using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename BaseType::OffloadMWVArray;
+
+  using BaseType::BasisSetSize;
+
+  ///number of centers, e.g., ions
+  size_t NumCenters;
+  ///number of quantum particles
+  size_t NumTargets;
+  ///ion particle set
+  const ParticleSet& ions_;
+  ///number of quantum particles
+  const int myTableIndex;
+  ///Global Coordinate of Supertwist read from HDF5
+  PosType SuperTwist;
+
+
+  /** container to store the offsets of the basis functions for each center
+   * Due to potential reordering of ions, offsets can be in any order.
+   */
+  std::vector<size_t> BasisOffset;
+
+  /** container of the unique pointers to the Atomic Orbitals
+   *
+   * size of LOBasisSet = number  of unique centers
+   */
+  std::vector<std::unique_ptr<COT>> LOBasisSet;
+
+  /** constructor
+   * @param ions ionic system
+   * @param els electronic system
+   */
+  SoaLocalizedBasisSetT(ParticleSet& ions, ParticleSet& els);
+
+  /** copy constructor */
+  SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a);
+
+  /** makeClone */
+  BaseType* makeClone() const override { return new SoaLocalizedBasisSetT<COT, ORBT>(*this); }
+
+  /** set Number of periodic Images to evaluate the orbitals. 
+      Set to 0 for non-PBC, and set manually in the input.
+      Passes the pre-computed phase factor for evaluation of complex wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or -1 if non-Gamma.  
+  */
+  void setPBCParams(const TinyVector<int, 3>& PBCImages,
+                    const TinyVector<double, 3> Sup_Twist,
+                    const std::vector<ORBT>& phase_factor);
+
+  /** set BasisSetSize and allocate mVGL container
+   */
+  void setBasisSetSize(int nbs) override;
+
+  /**  Determine which orbitals are S-type.  Used by cusp correction.
+    */
+  void queryOrbitalsForSType(const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const override;
+
+  /** compute VGL 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(5,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) override;
+
+  /** compute V using packed array with all walkers 
+   * @param P_list list of quantum particleset (one for each walker)
+   * @param iat active particle
+   * @param v   Array(n_walkers, BasisSetSize)
+   */
+  void mw_evaluateValue(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVArray& v) override;
+
+  /** compute VGL using packed array with all walkers 
+   * @param P_list list of quantum particleset (one for each walker)
+   * @param iat active particle
+   * @param vgl   Array(n_walkers, 5, BasisSetSize)
+   */
+  void mw_evaluateVGL(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVGLArray& vgl) override;
+
+  /** compute VGH 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(10,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) override;
+
+  /** compute VGHGH 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vghgh Matrix(20,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) override;
+
+  /** compute values for the iat-paricle move
+   *
+   * Always uses getTempDists() and getTempDispls()
+   * Tv is a translation vector; In PBC, in order to reduce the number
+   * of images that need to be summed over when generating the AO the 
+   * nearest image displacement, dr, is used. Tv corresponds to the 
+   * translation that takes the 'general displacement' (displacement
+   * between ion position and electron position) to the nearest image 
+   * displacement. We need to keep track of Tv because it must be add
+   * as a phase factor, i.e., exp(i*k*Tv).
+   */
+  void evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) override;
+
+  void evaluateGradSourceV(const ParticleSet& P, int iat, const ParticleSet& ions, int jion, vgl_type& vgl) override;
+
+  void evaluateGradSourceVGL(const ParticleSet& P,
+                             int iat,
+                             const ParticleSet& ions,
+                             int jion,
+                             vghgh_type& vghgh) override;
+
+  /** add a new set of Centered Atomic Orbitals
+   * @param icenter the index of the center
+   * @param aos a set of Centered Atomic Orbitals
+   */
+  void add(int icenter, std::unique_ptr<COT> aos);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index f76150ec2aa..916dfd4888c 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -21,1022 +21,1014 @@
 
 namespace qmcplusplus
 {
-template<typename T>
-RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
-    : SPOSetT<T>(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
+template <typename T>
+RotatedSPOsT<T>::RotatedSPOsT(
+    const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos) :
+    SPOSetT<T>(my_name),
+    OptimizableObject(my_name),
+    Phi(std::move(spos)),
+    nel_major_(0),
+    params_supplied(false)
 {
-  this->OrbitalSetSize = Phi->getOrbitalSetSize();
+    this->OrbitalSetSize = Phi->getOrbitalSetSize();
 }
 
-template<typename T>
+template <typename T>
 RotatedSPOsT<T>::~RotatedSPOsT()
-{}
+{
+}
 
-template<typename T>
-void RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
+template <typename T>
+void
+RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
 {
-  params          = param_list;
-  params_supplied = true;
+    params = param_list;
+    params_supplied = true;
 }
 
-template<typename T>
-void RotatedSPOsT<T>::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
+template <typename T>
+void
+RotatedSPOsT<T>::createRotationIndices(
+    int nel, int nmo, RotationIndices& rot_indices)
 {
-  for (int i = 0; i < nel; i++)
-    for (int j = nel; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
+    for (int i = 0; i < nel; i++)
+        for (int j = nel; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices)
+template <typename T>
+void
+RotatedSPOsT<T>::createRotationIndicesFull(
+    int nel, int nmo, RotationIndices& rot_indices)
 {
-  rot_indices.reserve(nmo * (nmo - 1) / 2);
-
-  // start with core-active rotations - put them at the beginning of the list
-  // so it matches the other list of rotation indices
-  for (int i = 0; i < nel; i++)
-    for (int j = nel; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
-
-  // Add core-core rotations - put them at the end of the list
-  for (int i = 0; i < nel; i++)
-    for (int j = i + 1; j < nel; j++)
-      rot_indices.emplace_back(i, j);
-
-  // Add active-active rotations - put them at the end of the list
-  for (int i = nel; i < nmo; i++)
-    for (int j = i + 1; j < nmo; j++)
-      rot_indices.emplace_back(i, j);
+    rot_indices.reserve(nmo * (nmo - 1) / 2);
+
+    // start with core-active rotations - put them at the beginning of the list
+    // so it matches the other list of rotation indices
+    for (int i = 0; i < nel; i++)
+        for (int j = nel; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
+
+    // Add core-core rotations - put them at the end of the list
+    for (int i = 0; i < nel; i++)
+        for (int j = i + 1; j < nel; j++)
+            rot_indices.emplace_back(i, j);
+
+    // Add active-active rotations - put them at the end of the list
+    for (int i = nel; i < nmo; i++)
+        for (int j = i + 1; j < nmo; j++)
+            rot_indices.emplace_back(i, j);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                                   const std::vector<RealType>& param,
-                                                   ValueMatrix& rot_mat)
+template <typename T>
+void
+RotatedSPOsT<T>::constructAntiSymmetricMatrix(
+    const RotationIndices& rot_indices, const std::vector<RealType>& param,
+    ValueMatrix& rot_mat)
 {
-  assert(rot_indices.size() == param.size());
-  // Assumes rot_mat is of the correct size
+    assert(rot_indices.size() == param.size());
+    // Assumes rot_mat is of the correct size
 
-  rot_mat = 0.0;
+    rot_mat = 0.0;
 
-  for (int i = 0; i < rot_indices.size(); i++)
-  {
-    const int p      = rot_indices[i].first;
-    const int q      = rot_indices[i].second;
-    const RealType x = param[i];
+    for (int i = 0; i < rot_indices.size(); i++) {
+        const int p = rot_indices[i].first;
+        const int q = rot_indices[i].second;
+        const RealType x = param[i];
 
-    rot_mat[q][p] = x;
-    rot_mat[p][q] = -x;
-  }
+        rot_mat[q][p] = x;
+        rot_mat[p][q] = -x;
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
-                                                           const ValueMatrix& rot_mat,
-                                                           std::vector<RealType>& param)
+template <typename T>
+void
+RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(
+    const RotationIndices& rot_indices, const ValueMatrix& rot_mat,
+    std::vector<RealType>& param)
 {
-  assert(rot_indices.size() == param.size());
-  // Assumes rot_mat is of the correct size
-
-  for (int i = 0; i < rot_indices.size(); i++)
-  {
-    const int p = rot_indices[i].first;
-    const int q = rot_indices[i].second;
-    param[i]    = rot_mat[q][p];
-  }
+    assert(rot_indices.size() == param.size());
+    // Assumes rot_mat is of the correct size
+
+    for (int i = 0; i < rot_indices.size(); i++) {
+        const int p = rot_indices[i].first;
+        const int q = rot_indices[i].second;
+        param[i] = rot_mat[q][p];
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::resetParametersExclusive(const opt_variables_type& active)
+template <typename T>
+void
+RotatedSPOsT<T>::resetParametersExclusive(const opt_variables_type& active)
 {
-  std::vector<RealType> delta_param(m_act_rot_inds.size());
-
-  size_t psize = m_act_rot_inds.size();
-
-  if (use_global_rot_)
-  {
-    psize = m_full_rot_inds.size();
-    assert(psize >= m_act_rot_inds.size());
-  }
-
-  std::vector<RealType> old_param(psize);
-  std::vector<RealType> new_param(psize);
-
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int loc         = this->myVars.where(i);
-    delta_param[i]  = active[loc] - this->myVars[i];
-    this->myVars[i] = active[loc];
-  }
-
-  if (use_global_rot_)
-  {
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-      old_param[i] = myVarsFull[i];
-
-    applyDeltaRotation(delta_param, old_param, new_param);
-
-    // Save the the params
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-      myVarsFull[i] = new_param[i];
-  }
-  else
-  {
-    apply_rotation(delta_param, false);
-
-    // Save the parameters in the history list
-    history_params_.push_back(delta_param);
-  }
+    std::vector<RealType> delta_param(m_act_rot_inds.size());
+
+    size_t psize = m_act_rot_inds.size();
+
+    if (use_global_rot_) {
+        psize = m_full_rot_inds.size();
+        assert(psize >= m_act_rot_inds.size());
+    }
+
+    std::vector<RealType> old_param(psize);
+    std::vector<RealType> new_param(psize);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int loc = this->myVars.where(i);
+        // FIXME extra call to real()
+        delta_param[i] = real(active[loc] - this->myVars[i]);
+        this->myVars[i] = active[loc];
+    }
+
+    if (use_global_rot_) {
+        for (int i = 0; i < m_full_rot_inds.size(); i++) {
+            // FIXME extra call to real()
+            old_param[i] = real(myVarsFull[i]);
+        }
+
+        applyDeltaRotation(delta_param, old_param, new_param);
+
+        // Save the the params
+        for (int i = 0; i < m_full_rot_inds.size(); i++)
+            myVarsFull[i] = new_param[i];
+    }
+    else {
+        apply_rotation(delta_param, false);
+
+        // Save the parameters in the history list
+        history_params_.push_back(delta_param);
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
+template <typename T>
+void
+RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
 {
-  hout.push("RotatedSPOsT");
-  if (use_global_rot_)
-  {
-    hout.push("rotation_global");
-    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
-
-    int nparam_full = myVarsFull.size();
-    std::vector<RealType> full_params(nparam_full);
-    for (int i = 0; i < nparam_full; i++)
-      full_params[i] = myVarsFull[i];
-
-    hout.write(full_params, rot_global_name);
-    hout.pop();
-  }
-  else
-  {
-    hout.push("rotation_history");
-    size_t rows = history_params_.size();
-    size_t cols = 0;
-    if (rows > 0)
-      cols = history_params_[0].size();
-
-    Matrix<RealType> tmp(rows, cols);
-    for (size_t i = 0; i < rows; i++)
-      for (size_t j = 0; j < cols; j++)
-        tmp(i, j) = history_params_[i][j];
-
-    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
-    hout.write(tmp, rot_hist_name);
-    hout.pop();
-  }
+    hout.push("RotatedSPOsT");
+    if (use_global_rot_) {
+        hout.push("rotation_global");
+        std::string rot_global_name =
+            std::string("rotation_global_") + SPOSetT<T>::getName();
+
+        int nparam_full = myVarsFull.size();
+        std::vector<RealType> full_params(nparam_full);
+        for (int i = 0; i < nparam_full; i++) {
+            // FIXME extra call to real()
+            full_params[i] = real(myVarsFull[i]);
+        }
 
-  // Save myVars in order to restore object state exactly
-  //  The values aren't meaningful, but they need to match those saved in
-  //  VariableSet
-  hout.push("rotation_params");
-  std::string rot_params_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+        hout.write(full_params, rot_global_name);
+        hout.pop();
+    }
+    else {
+        hout.push("rotation_history");
+        size_t rows = history_params_.size();
+        size_t cols = 0;
+        if (rows > 0)
+            cols = history_params_[0].size();
+
+        Matrix<RealType> tmp(rows, cols);
+        for (size_t i = 0; i < rows; i++)
+            for (size_t j = 0; j < cols; j++)
+                tmp(i, j) = history_params_[i][j];
+
+        std::string rot_hist_name =
+            std::string("rotation_history_") + SPOSetT<T>::getName();
+        hout.write(tmp, rot_hist_name);
+        hout.pop();
+    }
 
-  int nparam = this->myVars.size();
-  std::vector<RealType> params(nparam);
-  for (int i = 0; i < nparam; i++)
-    params[i] = this->myVars[i];
+    // Save myVars in order to restore object state exactly
+    //  The values aren't meaningful, but they need to match those saved in
+    //  VariableSet
+    hout.push("rotation_params");
+    std::string rot_params_name =
+        std::string("rotation_params_") + SPOSetT<T>::getName();
+
+    int nparam = this->myVars.size();
+    std::vector<RealType> params(nparam);
+    for (int i = 0; i < nparam; i++) {
+        // FIXME extra call to real()
+        params[i] = real(this->myVars[i]);
+    }
 
-  hout.write(params, rot_params_name);
-  hout.pop();
+    hout.write(params, rot_params_name);
+    hout.pop();
 
-  hout.pop();
+    hout.pop();
 }
 
-template<typename T>
-void RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
+template <typename T>
+void
+RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
 {
-  hin.push("RotatedSPOsT", false);
+    hin.push("RotatedSPOsT", false);
+
+    bool grp_hist_exists = hin.is_group("rotation_history");
+    bool grp_global_exists = hin.is_group("rotation_global");
+    if (!grp_hist_exists && !grp_global_exists)
+        app_warning() << "Rotation parameters not found in VP file";
+
+    if (grp_global_exists) {
+        hin.push("rotation_global", false);
+        std::string rot_global_name =
+            std::string("rotation_global_") + SPOSetT<T>::getName();
+
+        std::vector<int> sizes(1);
+        if (!hin.getShape<RealType>(rot_global_name, sizes))
+            throw std::runtime_error(
+                "Failed to read rotation_global in VP file");
+
+        int nparam_full_actual = sizes[0];
+        int nparam_full = myVarsFull.size();
+
+        if (nparam_full != nparam_full_actual) {
+            std::ostringstream tmp_err;
+            tmp_err << "Expected number of full rotation parameters ("
+                    << nparam_full << ") does not match number in file ("
+                    << nparam_full_actual << ")";
+            throw std::runtime_error(tmp_err.str());
+        }
+        std::vector<RealType> full_params(nparam_full);
+        hin.read(full_params, rot_global_name);
+        for (int i = 0; i < nparam_full; i++)
+            myVarsFull[i] = full_params[i];
 
-  bool grp_hist_exists   = hin.is_group("rotation_history");
-  bool grp_global_exists = hin.is_group("rotation_global");
-  if (!grp_hist_exists && !grp_global_exists)
-    app_warning() << "Rotation parameters not found in VP file";
+        hin.pop();
 
-  if (grp_global_exists)
-  {
-    hin.push("rotation_global", false);
-    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+        applyFullRotation(full_params, true);
+    }
+    else if (grp_hist_exists) {
+        hin.push("rotation_history", false);
+        std::string rot_hist_name =
+            std::string("rotation_history_") + SPOSetT<T>::getName();
+        std::vector<int> sizes(2);
+        if (!hin.getShape<RealType>(rot_hist_name, sizes))
+            throw std::runtime_error(
+                "Failed to read rotation history in VP file");
+
+        int rows = sizes[0];
+        int cols = sizes[1];
+        history_params_.resize(rows);
+        Matrix<RealType> tmp(rows, cols);
+        hin.read(tmp, rot_hist_name);
+        for (size_t i = 0; i < rows; i++) {
+            history_params_[i].resize(cols);
+            for (size_t j = 0; j < cols; j++)
+                history_params_[i][j] = tmp(i, j);
+        }
 
-    std::vector<int> sizes(1);
-    if (!hin.getShape<RealType>(rot_global_name, sizes))
-      throw std::runtime_error("Failed to read rotation_global in VP file");
-
-    int nparam_full_actual = sizes[0];
-    int nparam_full        = myVarsFull.size();
-
-    if (nparam_full != nparam_full_actual)
-    {
-      std::ostringstream tmp_err;
-      tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file ("
-              << nparam_full_actual << ")";
-      throw std::runtime_error(tmp_err.str());
+        hin.pop();
+
+        applyRotationHistory();
     }
-    std::vector<RealType> full_params(nparam_full);
-    hin.read(full_params, rot_global_name);
-    for (int i = 0; i < nparam_full; i++)
-      myVarsFull[i] = full_params[i];
 
-    hin.pop();
+    hin.push("rotation_params", false);
+    std::string rot_param_name =
+        std::string("rotation_params_") + SPOSetT<T>::getName();
 
-    applyFullRotation(full_params, true);
-  }
-  else if (grp_hist_exists)
-  {
-    hin.push("rotation_history", false);
-    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
-    std::vector<int> sizes(2);
-    if (!hin.getShape<RealType>(rot_hist_name, sizes))
-      throw std::runtime_error("Failed to read rotation history in VP file");
-
-    int rows = sizes[0];
-    int cols = sizes[1];
-    history_params_.resize(rows);
-    Matrix<RealType> tmp(rows, cols);
-    hin.read(tmp, rot_hist_name);
-    for (size_t i = 0; i < rows; i++)
-    {
-      history_params_[i].resize(cols);
-      for (size_t j = 0; j < cols; j++)
-        history_params_[i][j] = tmp(i, j);
+    std::vector<int> sizes(1);
+    if (!hin.getShape<RealType>(rot_param_name, sizes))
+        throw std::runtime_error("Failed to read rotation_params in VP file");
+
+    int nparam_actual = sizes[0];
+    int nparam = this->myVars.size();
+    if (nparam != nparam_actual) {
+        std::ostringstream tmp_err;
+        tmp_err << "Expected number of rotation parameters (" << nparam
+                << ") does not match number in file (" << nparam_actual << ")";
+        throw std::runtime_error(tmp_err.str());
     }
 
+    std::vector<RealType> params(nparam);
+    hin.read(params, rot_param_name);
+    for (int i = 0; i < nparam; i++)
+        this->myVars[i] = params[i];
+
     hin.pop();
 
-    applyRotationHistory();
-  }
+    hin.pop();
+}
 
-  hin.push("rotation_params", false);
-  std::string rot_param_name = std::string("rotation_params_") + SPOSetT<T>::getName();
+template <typename T>
+void
+RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+{
+#if !defined(QMC_COMPLEX)
+    /* Only rebuild optimized variables if more after-rotation orbitals are
+     * needed Consider ROHF, there is only one set of SPO for both spin up and
+     * down Nup > Ndown. nel_major_ will be set Nup.
+     *
+     * Use the size of myVars as a flag to avoid building the rotation
+     * parameters again when a clone is made (the DiracDeterminant constructor
+     * calls buildOptVariables)
+     */
+    if (nel > nel_major_ && this->myVars.size() == 0) {
+        nel_major_ = nel;
 
-  std::vector<int> sizes(1);
-  if (!hin.getShape<RealType>(rot_param_name, sizes))
-    throw std::runtime_error("Failed to read rotation_params in VP file");
+        const size_t nmo = Phi->getOrbitalSetSize();
 
-  int nparam_actual = sizes[0];
-  int nparam        = this->myVars.size();
-  if (nparam != nparam_actual)
-  {
-    std::ostringstream tmp_err;
-    tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file ("
-            << nparam_actual << ")";
-    throw std::runtime_error(tmp_err.str());
-  }
+        // create active rotation parameter indices
+        RotationIndices created_m_act_rot_inds;
 
-  std::vector<RealType> params(nparam);
-  hin.read(params, rot_param_name);
-  for (int i = 0; i < nparam; i++)
-    this->myVars[i] = params[i];
+        RotationIndices created_full_rot_inds;
+        if (use_global_rot_)
+            createRotationIndicesFull(nel, nmo, created_full_rot_inds);
 
-  hin.pop();
+        createRotationIndices(nel, nmo, created_m_act_rot_inds);
 
-  hin.pop();
+        buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
+    }
+#endif
 }
 
-template<typename T>
-void RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+template <typename T>
+void
+RotatedSPOsT<T>::buildOptVariables(
+    const RotationIndices& rotations, const RotationIndices& full_rotations)
 {
 #if !defined(QMC_COMPLEX)
-  /* Only rebuild optimized variables if more after-rotation orbitals are
-	 * needed Consider ROHF, there is only one set of SPO for both spin up and
-	 * down Nup > Ndown. nel_major_ will be set Nup.
-	 *
-	 * Use the size of myVars as a flag to avoid building the rotation
-	 * parameters again when a clone is made (the DiracDeterminant constructor
-	 * calls buildOptVariables)
-	 */
-  if (nel > nel_major_ && this->myVars.size() == 0)
-  {
-    nel_major_ = nel;
-
     const size_t nmo = Phi->getOrbitalSetSize();
 
-    // create active rotation parameter indices
-    RotationIndices created_m_act_rot_inds;
+    // create active rotations
+    m_act_rot_inds = rotations;
 
-    RotationIndices created_full_rot_inds;
     if (use_global_rot_)
-      createRotationIndicesFull(nel, nmo, created_full_rot_inds);
+        m_full_rot_inds = full_rotations;
 
-    createRotationIndices(nel, nmo, created_m_act_rot_inds);
+    if (use_global_rot_)
+        app_log() << "Orbital rotation using global rotation" << std::endl;
+    else
+        app_log() << "Orbital rotation using history" << std::endl;
 
-    buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
-  }
-#endif
-}
+    // This will add the orbital rotation parameters to myVars
+    // and will also read in initial parameter values supplied in input file
+    int p, q;
+    int nparams_active = m_act_rot_inds.size();
 
-template<typename T>
-void RotatedSPOsT<T>::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations)
-{
-#if !defined(QMC_COMPLEX)
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  // create active rotations
-  m_act_rot_inds = rotations;
-
-  if (use_global_rot_)
-    m_full_rot_inds = full_rotations;
-
-  if (use_global_rot_)
-    app_log() << "Orbital rotation using global rotation" << std::endl;
-  else
-    app_log() << "Orbital rotation using history" << std::endl;
-
-  // This will add the orbital rotation parameters to myVars
-  // and will also read in initial parameter values supplied in input file
-  int p, q;
-  int nparams_active = m_act_rot_inds.size();
-
-  app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl;
-  if (params_supplied)
-    if (nparams_active != params.size())
-      throw std::runtime_error("The number of supplied orbital rotation parameters does not "
-                               "match number prdouced by the slater "
-                               "expansion. \n");
-
-  this->myVars.clear();
-  for (int i = 0; i < nparams_active; i++)
-  {
-    p = m_act_rot_inds[i].first;
-    q = m_act_rot_inds[i].second;
-    std::stringstream sstr;
-    sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "")
-         << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q;
-
-    // If the user input parameters, use those. Otherwise, initialize the
-    // parameters to zero
+    app_log() << "nparams_active: " << nparams_active
+              << " params2.size(): " << params.size() << std::endl;
     if (params_supplied)
-    {
-      this->myVars.insert(sstr.str(), params[i]);
+        if (nparams_active != params.size())
+            throw std::runtime_error(
+                "The number of supplied orbital rotation parameters does not "
+                "match number prdouced by the slater "
+                "expansion. \n");
+
+    this->myVars.clear();
+    for (int i = 0; i < nparams_active; i++) {
+        p = m_act_rot_inds[i].first;
+        q = m_act_rot_inds[i].second;
+        std::stringstream sstr;
+        sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
+             << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
+             << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
+             << (q < 1000 ? "0" : "") << q;
+
+        // If the user input parameters, use those. Otherwise, initialize the
+        // parameters to zero
+        if (params_supplied) {
+            this->myVars.insert(sstr.str(), params[i]);
+        }
+        else {
+            this->myVars.insert(sstr.str(), 0.0);
+        }
     }
-    else
-    {
-      this->myVars.insert(sstr.str(), 0.0);
+
+    if (use_global_rot_) {
+        myVarsFull.clear();
+        for (int i = 0; i < m_full_rot_inds.size(); i++) {
+            p = m_full_rot_inds[i].first;
+            q = m_full_rot_inds[i].second;
+            std::stringstream sstr;
+            sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
+                 << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
+                 << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
+                 << (q < 1000 ? "0" : "") << q;
+
+            if (params_supplied && i < m_act_rot_inds.size())
+                myVarsFull.insert(sstr.str(), params[i]);
+            else
+                myVarsFull.insert(sstr.str(), 0.0);
+        }
     }
-  }
-
-  if (use_global_rot_)
-  {
-    myVarsFull.clear();
-    for (int i = 0; i < m_full_rot_inds.size(); i++)
-    {
-      p = m_full_rot_inds[i].first;
-      q = m_full_rot_inds[i].second;
-      std::stringstream sstr;
-      sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "")
-           << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "")
-           << q;
-
-      if (params_supplied && i < m_act_rot_inds.size())
-        myVarsFull.insert(sstr.str(), params[i]);
-      else
-        myVarsFull.insert(sstr.str(), 0.0);
+
+    // Printing the parameters
+    if (true) {
+        app_log() << std::string(16, ' ') << "Parameter name"
+                  << std::string(15, ' ') << "Value\n";
+        this->myVars.print(app_log());
+    }
+
+    if (params_supplied) {
+        std::vector<RealType> param(m_act_rot_inds.size());
+        for (int i = 0; i < m_act_rot_inds.size(); i++)
+            param[i] = this->myVars[i];
+        apply_rotation(param, false);
     }
-  }
-
-  // Printing the parameters
-  if (true)
-  {
-    app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n";
-    this->myVars.print(app_log());
-  }
-
-  if (params_supplied)
-  {
-    std::vector<RealType> param(m_act_rot_inds.size());
-    for (int i = 0; i < m_act_rot_inds.size(); i++)
-      param[i] = this->myVars[i];
-    apply_rotation(param, false);
-  }
 #endif
 }
 
-template<typename T>
-void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool use_stored_copy)
+template <typename T>
+void
+RotatedSPOsT<T>::apply_rotation(
+    const std::vector<RealType>& param, bool use_stored_copy)
 {
-  assert(param.size() == m_act_rot_inds.size());
+    assert(param.size() == m_act_rot_inds.size());
 
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix rot_mat(nmo, nmo);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
+    constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
 
-  /*
-	  rot_mat is now an anti-hermitian matrix. Now we convert
-	  it into a unitary matrix via rot_mat = exp(-rot_mat).
-	  Finally, apply unitary matrix to orbs.
-	*/
-  exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+    /*
+        rot_mat is now an anti-hermitian matrix. Now we convert
+        it into a unitary matrix via rot_mat = exp(-rot_mat).
+        Finally, apply unitary matrix to orbs.
+      */
+    exponentiate_antisym_matrix(rot_mat);
+    Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
-                                         const std::vector<RealType>& old_param,
-                                         std::vector<RealType>& new_param)
+template <typename T>
+void
+RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
+    const std::vector<RealType>& old_param, std::vector<RealType>& new_param)
 {
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix new_rot_mat(nmo, nmo);
-  constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix new_rot_mat(nmo, nmo);
+    constructDeltaRotation(delta_param, old_param, m_act_rot_inds,
+        m_full_rot_inds, new_param, new_rot_mat);
 
-  Phi->applyRotation(new_rot_mat, true);
+    Phi->applyRotation(new_rot_mat, true);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::constructDeltaRotation(const std::vector<RealType>& delta_param,
-                                             const std::vector<RealType>& old_param,
-                                             const RotationIndices& act_rot_inds,
-                                             const RotationIndices& full_rot_inds,
-                                             std::vector<RealType>& new_param,
-                                             ValueMatrix& new_rot_mat)
+template <typename T>
+void
+RotatedSPOsT<T>::constructDeltaRotation(
+    const std::vector<RealType>& delta_param,
+    const std::vector<RealType>& old_param, const RotationIndices& act_rot_inds,
+    const RotationIndices& full_rot_inds, std::vector<RealType>& new_param,
+    ValueMatrix& new_rot_mat)
 {
-  assert(delta_param.size() == act_rot_inds.size());
-  assert(old_param.size() == full_rot_inds.size());
-  assert(new_param.size() == full_rot_inds.size());
+    assert(delta_param.size() == act_rot_inds.size());
+    assert(old_param.size() == full_rot_inds.size());
+    assert(new_param.size() == full_rot_inds.size());
 
-  const size_t nmo = new_rot_mat.rows();
-  assert(new_rot_mat.rows() == new_rot_mat.cols());
+    const size_t nmo = new_rot_mat.rows();
+    assert(new_rot_mat.rows() == new_rot_mat.cols());
 
-  ValueMatrix old_rot_mat(nmo, nmo);
+    ValueMatrix old_rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
-  exponentiate_antisym_matrix(old_rot_mat);
+    constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
+    exponentiate_antisym_matrix(old_rot_mat);
 
-  ValueMatrix delta_rot_mat(nmo, nmo);
+    ValueMatrix delta_rot_mat(nmo, nmo);
 
-  constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
-  exponentiate_antisym_matrix(delta_rot_mat);
+    constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
+    exponentiate_antisym_matrix(delta_rot_mat);
 
-  // Apply delta rotation to old rotation.
-  BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(),
-             nmo);
+    // Apply delta rotation to old rotation.
+    BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo,
+        old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), nmo);
 
-  ValueMatrix log_rot_mat(nmo, nmo);
-  log_antisym_matrix(new_rot_mat, log_rot_mat);
-  extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
+    ValueMatrix log_rot_mat(nmo, nmo);
+    log_antisym_matrix(new_rot_mat, log_rot_mat);
+    extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy)
+template <typename T>
+void
+RotatedSPOsT<T>::applyFullRotation(
+    const std::vector<RealType>& full_param, bool use_stored_copy)
 {
-  assert(full_param.size() == m_full_rot_inds.size());
-
-  const size_t nmo = Phi->getOrbitalSetSize();
-  ValueMatrix rot_mat(nmo, nmo);
-  rot_mat = T(0);
+    assert(full_param.size() == m_full_rot_inds.size());
 
-  constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
-
-  /*
-	  rot_mat is now an anti-hermitian matrix. Now we convert
-	  it into a unitary matrix via rot_mat = exp(-rot_mat).
-	  Finally, apply unitary matrix to orbs.
-	*/
-  exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+    const size_t nmo = Phi->getOrbitalSetSize();
+    ValueMatrix rot_mat(nmo, nmo);
+    rot_mat = T(0);
+
+    constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
+
+    /*
+        rot_mat is now an anti-hermitian matrix. Now we convert
+        it into a unitary matrix via rot_mat = exp(-rot_mat).
+        Finally, apply unitary matrix to orbs.
+      */
+    exponentiate_antisym_matrix(rot_mat);
+    Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template<typename T>
-void RotatedSPOsT<T>::applyRotationHistory()
+template <typename T>
+void
+RotatedSPOsT<T>::applyRotationHistory()
 {
-  for (auto delta_param : history_params_)
-  {
-    apply_rotation(delta_param, false);
-  }
+    for (auto delta_param : history_params_) {
+        apply_rotation(delta_param, false);
+    }
 }
 
 // compute exponential of a real, antisymmetric matrix by diagonalizing and
 // exponentiating eigenvalues
-template<typename T>
-void RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
+template <typename T>
+void
+RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
 {
-  const int n = mat.rows();
-  std::vector<std::complex<RealType>> mat_h(n * n, 0);
-  std::vector<RealType> eval(n, 0);
-  std::vector<std::complex<RealType>> work(2 * n, 0);
-  std::vector<RealType> rwork(3 * n, 0);
-  std::vector<std::complex<RealType>> mat_d(n * n, 0);
-  std::vector<std::complex<RealType>> mat_t(n * n, 0);
-  // exponentiating e^X = e^iY (Y hermitian)
-  // i(-iX) = X, so -iX is hermitian
-  // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
-  // construct hermitian analogue of mat by multiplying by -i
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = i; j < n; ++j)
-    {
-      mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
-      mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+    const int n = mat.rows();
+    std::vector<std::complex<RealType>> mat_h(n * n, 0);
+    std::vector<RealType> eval(n, 0);
+    std::vector<std::complex<RealType>> work(2 * n, 0);
+    std::vector<RealType> rwork(3 * n, 0);
+    std::vector<std::complex<RealType>> mat_d(n * n, 0);
+    std::vector<std::complex<RealType>> mat_t(n * n, 0);
+    // exponentiating e^X = e^iY (Y hermitian)
+    // i(-iX) = X, so -iX is hermitian
+    // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
+    // construct hermitian analogue of mat by multiplying by -i
+    for (int i = 0; i < n; ++i) {
+        for (int j = i; j < n; ++j) {
+            mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
+            mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
+        }
     }
-  }
-  // diagonalize the matrix
-  char JOBZ('V');
-  char UPLO('U');
-  int N(n);
-  int LDA(n);
-  int LWORK(2 * n);
-  int info = 0;
-  LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info);
-  if (info != 0)
-  {
-    std::ostringstream msg;
-    msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix";
-    throw std::runtime_error(msg.str());
-  }
-  // iterate through diagonal matrix, exponentiate terms
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = 0; j < n; ++j)
-    {
-      mat_d[i + j * n] = (i == j) ? std::exp(std::complex<RealType>(0.0, eval[i])) : std::complex<RealType>(0.0, 0.0);
+    // diagonalize the matrix
+    char JOBZ('V');
+    char UPLO('U');
+    int N(n);
+    int LDA(n);
+    int LWORK(2 * n);
+    int info = 0;
+    LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0),
+        LWORK, &rwork.at(0), info);
+    if (info != 0) {
+        std::ostringstream msg;
+        msg << "heev failed with info = " << info
+            << " in RotatedSPOsT::exponentiate_antisym_matrix";
+        throw std::runtime_error(msg.str());
     }
-  }
-  // perform matrix multiplication
-  // assume row major
-  BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n,
-             std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
-  BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n,
-             std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-    {
-      if (mat_d[i + n * j].imag() > 1e-12)
-      {
-        app_log() << "warning: large imaginary value in orbital "
-                     "rotation matrix: (i,j) = ("
-                  << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl;
-      }
-      mat[j][i] = mat_d[i + n * j].real();
+    // iterate through diagonal matrix, exponentiate terms
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            mat_d[i + j * n] = (i == j) ?
+                std::exp(std::complex<RealType>(0.0, eval[i])) :
+                std::complex<RealType>(0.0, 0.0);
+        }
     }
+    // perform matrix multiplication
+    // assume row major
+    BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0),
+        n, &mat_h.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
+    BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0),
+        n, &mat_t.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j) {
+            if (mat_d[i + n * j].imag() > 1e-12) {
+                app_log() << "warning: large imaginary value in orbital "
+                             "rotation matrix: (i,j) = ("
+                          << i << "," << j
+                          << "), im = " << mat_d[i + n * j].imag() << std::endl;
+            }
+            mat[j][i] = mat_d[i + n * j].real();
+        }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
+template <typename T>
+void
+RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
 {
-  const int n = mat.rows();
-  std::vector<RealType> mat_h(n * n, 0);
-  std::vector<RealType> eval_r(n, 0);
-  std::vector<RealType> eval_i(n, 0);
-  std::vector<RealType> mat_l(n * n, 0);
-  std::vector<RealType> work(4 * n, 0);
-
-  std::vector<std::complex<RealType>> mat_cd(n * n, 0);
-  std::vector<std::complex<RealType>> mat_cl(n * n, 0);
-  std::vector<std::complex<RealType>> mat_ch(n * n, 0);
-
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-      mat_h[i + n * j] = mat[i][j];
-
-  // diagonalize the matrix
-  char JOBL('V');
-  char JOBR('N');
-  int N(n);
-  int LDA(n);
-  int LWORK(4 * n);
-  int info = 0;
-  LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA,
-               &work.at(0), &LWORK, &info);
-  if (info != 0)
-  {
-    std::ostringstream msg;
-    msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix";
-    throw std::runtime_error(msg.str());
-  }
-
-  // iterate through diagonal matrix, take log
-  for (int i = 0; i < n; ++i)
-  {
-    for (int j = 0; j < n; ++j)
-    {
-      auto tmp = (i == j) ? std::log(std::complex<RealType>(eval_r[i], eval_i[i])) : std::complex<RealType>(0.0, 0.0);
-      mat_cd[i + j * n] = tmp;
-
-      if (eval_i[j] > 0.0)
-      {
-        mat_cl[i + j * n]       = std::complex<RealType>(mat_l[i + j * n], mat_l[i + (j + 1) * n]);
-        mat_cl[i + (j + 1) * n] = std::complex<RealType>(mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
-      }
-      else if (!(eval_i[j] < 0.0))
-      {
-        mat_cl[i + j * n] = std::complex<RealType>(mat_l[i + j * n], 0.0);
-      }
+    const int n = mat.rows();
+    std::vector<RealType> mat_h(n * n, 0);
+    std::vector<RealType> eval_r(n, 0);
+    std::vector<RealType> eval_i(n, 0);
+    std::vector<RealType> mat_l(n * n, 0);
+    std::vector<RealType> work(4 * n, 0);
+
+    std::vector<std::complex<RealType>> mat_cd(n * n, 0);
+    std::vector<std::complex<RealType>> mat_cl(n * n, 0);
+    std::vector<std::complex<RealType>> mat_ch(n * n, 0);
+
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j)
+            mat_h[i + n * j] = mat[i][j];
+
+    // diagonalize the matrix
+    char JOBL('V');
+    char JOBR('N');
+    int N(n);
+    int LDA(n);
+    int LWORK(4 * n);
+    int info = 0;
+    LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0),
+        &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, &work.at(0), &LWORK,
+        &info);
+    if (info != 0) {
+        std::ostringstream msg;
+        msg << "heev failed with info = " << info
+            << " in RotatedSPOsT::log_antisym_matrix";
+        throw std::runtime_error(msg.str());
     }
-  }
-
-  RealType one(1.0);
-  RealType zero(0.0);
-  BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n);
-  BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n);
-
-  for (int i = 0; i < n; ++i)
-    for (int j = 0; j < n; ++j)
-    {
-      if (mat_cd[i + n * j].imag() > 1e-12)
-      {
-        app_log() << "warning: large imaginary value in antisymmetric "
-                     "matrix: (i,j) = ("
-                  << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl;
-      }
-      output[i][j] = mat_cd[i + n * j].real();
+
+    // iterate through diagonal matrix, take log
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            auto tmp = (i == j) ?
+                std::log(std::complex<RealType>(eval_r[i], eval_i[i])) :
+                std::complex<RealType>(0.0, 0.0);
+            mat_cd[i + j * n] = tmp;
+
+            if (eval_i[j] > 0.0) {
+                mat_cl[i + j * n] = std::complex<RealType>(
+                    mat_l[i + j * n], mat_l[i + (j + 1) * n]);
+                mat_cl[i + (j + 1) * n] = std::complex<RealType>(
+                    mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
+            }
+            else if (!(eval_i[j] < 0.0)) {
+                mat_cl[i + j * n] =
+                    std::complex<RealType>(mat_l[i + j * n], 0.0);
+            }
+        }
     }
+
+    RealType one(1.0);
+    RealType zero(0.0);
+    BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero,
+        &mat_ch.at(0), n);
+    BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero,
+        &mat_cd.at(0), n);
+
+    for (int i = 0; i < n; ++i)
+        for (int j = 0; j < n; ++j) {
+            if (mat_cd[i + n * j].imag() > 1e-12) {
+                app_log() << "warning: large imaginary value in antisymmetric "
+                             "matrix: (i,j) = ("
+                          << i << "," << j
+                          << "), im = " << mat_cd[i + n * j].imag()
+                          << std::endl;
+            }
+            output[i][j] = mat_cd[i + n * j].real();
+        }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
-                                          const opt_variables_type& optvars,
-                                          ValueVector& psi,
-                                          const ValueVector& psiinv,
-                                          std::vector<T>& ratios,
-                                          Matrix<T>& dratios,
-                                          int FirstIndex,
-                                          int LastIndex)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSet& VP,
+    const opt_variables_type& optvars, ValueVector& psi,
+    const ValueVector& psiinv, std::vector<T>& ratios, Matrix<T>& dratios,
+    int FirstIndex, int LastIndex)
 {
-  Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
 
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
 
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
 
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
 
-  const ParticleSet& P = VP.getRefPS();
-  int iel              = VP.refPtcl;
+    const ParticleSet& P = VP.getRefPS();
+    int iel = VP.refPtcl;
+
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
+
+    Invert(psiM_inv.data(), nel, nel);
+
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+    ValueMatrix T_orig;
+    T_orig.resize(nel, nmo);
 
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_orig.data(), nmo);
 
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
+    ValueMatrix T_mat;
+    T_mat.resize(nel, nmo);
 
-  Invert(psiM_inv.data(), nel, nel);
+    ValueVector tmp_psi;
+    tmp_psi.resize(nmo);
 
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  ValueMatrix T_orig;
-  T_orig.resize(nel, nmo);
+    for (int iat = 0; iat < VP.getTotalNum(); iat++) {
+        Phi->evaluateValue(VP, iat, tmp_psi);
 
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo);
+        for (int j = 0; j < nmo; j++)
+            psiM_all(iel - FirstIndex, j) = tmp_psi[j];
 
-  ValueMatrix T_mat;
-  T_mat.resize(nel, nmo);
+        for (int i = 0; i < nel; i++)
+            for (int j = 0; j < nel; j++)
+                psiM_inv(i, j) = psiM_all(i, j);
 
-  ValueVector tmp_psi;
-  tmp_psi.resize(nmo);
+        Invert(psiM_inv.data(), nel, nel);
 
-  for (int iat = 0; iat < VP.getTotalNum(); iat++)
-  {
-    Phi->evaluateValue(VP, iat, tmp_psi);
+        const T* const A(psiM_all.data());
+        const T* const Ainv(psiM_inv.data());
 
-    for (int j = 0; j < nmo; j++)
-      psiM_all(iel - FirstIndex, j) = tmp_psi[j];
+        // The matrix A is rectangular.  Ainv is the inverse of the square part
+        // of the matrix. The multiply of Ainv and the square part of A is just
+        // the identity. This multiply could be reduced to Ainv and the
+        // non-square part of A.
+        BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+            T_mat.data(), nmo);
+
+        for (int i = 0; i < m_act_rot_inds.size(); i++) {
+            int kk = this->myVars.where(i);
+            if (kk >= 0) {
+                const int p = m_act_rot_inds.at(i).first;
+                const int q = m_act_rot_inds.at(i).second;
+                dratios(iat, kk) = T_mat(p, q) -
+                    T_orig(p, q); // dratio size is (nknot, num_vars)
+            }
+        }
+    }
+}
+
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+    const opt_variables_type& optvars, Vector<T>& dlogpsi, int FirstIndex,
+    int LastIndex)
+{
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
+
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
+
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
+
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
     for (int i = 0; i < nel; i++)
-      for (int j = 0; j < nel; j++)
-        psiM_inv(i, j) = psiM_all(i, j);
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
 
     Invert(psiM_inv.data(), nel, nel);
 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
     const T* const A(psiM_all.data());
     const T* const Ainv(psiM_inv.data());
-
-    // The matrix A is rectangular.  Ainv is the inverse of the square part
-    // of the matrix. The multiply of Ainv and the square part of A is just
-    // the identity. This multiply could be reduced to Ainv and the
-    // non-square part of A.
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
-
-    for (int i = 0; i < m_act_rot_inds.size(); i++)
-    {
-      int kk = this->myVars.where(i);
-      if (kk >= 0)
-      {
-        const int p      = m_act_rot_inds.at(i).first;
-        const int q      = m_act_rot_inds.at(i).second;
-        dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars)
-      }
+    ValueMatrix T_mat;
+    T_mat.resize(nel, nmo);
+
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_mat.data(), nmo);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int kk = this->myVars.where(i);
+        if (kk >= 0) {
+            const int p = m_act_rot_inds.at(i).first;
+            const int q = m_act_rot_inds.at(i).second;
+            dlogpsi[kk] = T_mat(p, q);
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                            const opt_variables_type& optvars,
-                                            Vector<T>& dlogpsi,
-                                            int FirstIndex,
-                                            int LastIndex)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+    const opt_variables_type& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const int& FirstIndex, const int& LastIndex)
 {
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+    const size_t nel = LastIndex - FirstIndex;
+    const size_t nmo = Phi->getOrbitalSetSize();
 
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+    myG_temp.resize(nel);
+    myG_J.resize(nel);
+    myL_temp.resize(nel);
+    myL_J.resize(nel);
 
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
+    myG_temp = 0;
+    myG_J = 0;
+    myL_temp = 0;
+    myL_J = 0;
 
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+    Bbar.resize(nel, nmo);
+    psiM_inv.resize(nel, nel);
+    psiM_all.resize(nel, nmo);
+    dpsiM_all.resize(nel, nmo);
+    d2psiM_all.resize(nel, nmo);
 
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
+    Bbar = 0;
+    psiM_inv = 0;
+    psiM_all = 0;
+    dpsiM_all = 0;
+    d2psiM_all = 0;
 
-  Invert(psiM_inv.data(), nel, nel);
+    Phi->evaluate_notranspose(
+        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  ValueMatrix T_mat;
-  T_mat.resize(nel, nmo);
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nel; j++)
+            psiM_inv(i, j) = psiM_all(i, j);
 
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+    Invert(psiM_inv.data(), nel, nel);
 
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int kk = this->myVars.where(i);
-    if (kk >= 0)
-    {
-      const int p = m_act_rot_inds.at(i).first;
-      const int q = m_act_rot_inds.at(i).second;
-      dlogpsi[kk] = T_mat(p, q);
+    // current value of Gradient and Laplacian
+    //  gradient components
+    for (int a = 0; a < nel; a++)
+        for (int i = 0; i < nel; i++)
+            for (int k = 0; k < 3; k++)
+                myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
+    // laplacian components
+    for (int a = 0; a < nel; a++) {
+        for (int i = 0; i < nel; i++)
+            myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
     }
-  }
-}
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
-                                          const opt_variables_type& optvars,
-                                          Vector<T>& dlogpsi,
-                                          Vector<T>& dhpsioverpsi,
-                                          const int& FirstIndex,
-                                          const int& LastIndex)
-{
-  const size_t nel = LastIndex - FirstIndex;
-  const size_t nmo = Phi->getOrbitalSetSize();
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
-  myG_temp.resize(nel);
-  myG_J.resize(nel);
-  myL_temp.resize(nel);
-  myL_J.resize(nel);
-
-  myG_temp = 0;
-  myG_J    = 0;
-  myL_temp = 0;
-  myL_J    = 0;
-
-  Bbar.resize(nel, nmo);
-  psiM_inv.resize(nel, nel);
-  psiM_all.resize(nel, nmo);
-  dpsiM_all.resize(nel, nmo);
-  d2psiM_all.resize(nel, nmo);
-
-  Bbar       = 0;
-  psiM_inv   = 0;
-  psiM_all   = 0;
-  dpsiM_all  = 0;
-  d2psiM_all = 0;
-
-  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
-
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nel; j++)
-      psiM_inv(i, j) = psiM_all(i, j);
-
-  Invert(psiM_inv.data(), nel, nel);
-
-  // current value of Gradient and Laplacian
-  //  gradient components
-  for (int a = 0; a < nel; a++)
-    for (int i = 0; i < nel; i++)
-      for (int k = 0; k < 3; k++)
-        myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
-  // laplacian components
-  for (int a = 0; a < nel; a++)
-  {
+    // calculation of myG_J which will be used to represent
+    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
+    // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
+    // this is what myL_J will hold
+    for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) {
+        myG_J[a] = (P.G[iat] - myG_temp[a]);
+        myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
+    }
+    // possibly replace wit BLAS calls
     for (int i = 0; i < nel; i++)
-      myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
-  }
-
-  // calculation of myG_J which will be used to represent
-  // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-  // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
-  // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
-  // this is what myL_J will hold
-  for (int a = 0, iat = FirstIndex; a < nel; a++, iat++)
-  {
-    myG_J[a] = (P.G[iat] - myG_temp[a]);
-    myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
-  }
-  // possibly replace wit BLAS calls
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nmo; j++)
-      Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j);
-
-  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-  const T* const A(psiM_all.data());
-  const T* const Ainv(psiM_inv.data());
-  const T* const B(Bbar.data());
-  ValueMatrix T_mat;
-  ValueMatrix Y1;
-  ValueMatrix Y2;
-  ValueMatrix Y3;
-  ValueMatrix Y4;
-  T_mat.resize(nel, nmo);
-  Y1.resize(nel, nel);
-  Y2.resize(nel, nmo);
-  Y3.resize(nel, nmo);
-  Y4.resize(nel, nmo);
-
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
-  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y1.data(), nel);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(), nel, T(0.0), Y2.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0), Y3.data(), nmo);
-
-  // possibly replace with BLAS call
-  Y4 = Y3 - Y2;
-
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    int kk = this->myVars.where(i);
-    if (kk >= 0)
-    {
-      const int p = m_act_rot_inds.at(i).first;
-      const int q = m_act_rot_inds.at(i).second;
-      dlogpsi[kk] += T_mat(p, q);
-      dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+        for (int j = 0; j < nmo; j++)
+            // FIXME Remove calls to real() when ParticleSet has been refactored
+            Bbar(i, j) = d2psiM_all(i, j) +
+                real(2.0 * dot(myG_J[i], dpsiM_all(i, j))) +
+                real(myL_J[i]) * psiM_all(i, j);
+
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
+    const T* const B(Bbar.data());
+    ValueMatrix T_mat;
+    ValueMatrix Y1;
+    ValueMatrix Y2;
+    ValueMatrix Y3;
+    ValueMatrix Y4;
+    T_mat.resize(nel, nmo);
+    Y1.resize(nel, nel);
+    Y2.resize(nel, nmo);
+    Y3.resize(nel, nmo);
+    Y4.resize(nel, nmo);
+
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
+        T_mat.data(), nmo);
+    BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0),
+        Y1.data(), nel);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_mat.data(), nmo, Y1.data(),
+        nel, T(0.0), Y2.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, T(0.0),
+        Y3.data(), nmo);
+
+    // possibly replace with BLAS call
+    Y4 = Y3 - Y2;
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++) {
+        int kk = this->myVars.where(i);
+        if (kk >= 0) {
+            const int p = m_act_rot_inds.at(i).first;
+            const int q = m_act_rot_inds.at(i).second;
+            dlogpsi[kk] += T_mat(p, q);
+            dhpsioverpsi[kk] += T(-0.5) * Y4(p, q);
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
-                                          const opt_variables_type& optvars,
-                                          Vector<T>& dlogpsi,
-                                          Vector<T>& dhpsioverpsi,
-                                          const T& psiCurrent,
-                                          const std::vector<T>& Coeff,
-                                          const std::vector<size_t>& C2node_up,
-                                          const std::vector<size_t>& C2node_dn,
-                                          const ValueVector& detValues_up,
-                                          const ValueVector& detValues_dn,
-                                          const GradMatrix& grads_up,
-                                          const GradMatrix& grads_dn,
-                                          const ValueMatrix& lapls_up,
-                                          const ValueMatrix& lapls_dn,
-                                          const ValueMatrix& M_up,
-                                          const ValueMatrix& M_dn,
-                                          const ValueMatrix& Minv_up,
-                                          const ValueMatrix& Minv_dn,
-                                          const GradMatrix& B_grad,
-                                          const ValueMatrix& B_lapl,
-                                          const std::vector<int>& detData_up,
-                                          const size_t N1,
-                                          const size_t N2,
-                                          const size_t NP1,
-                                          const size_t NP2,
-                                          const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivatives(ParticleSet& P,
+    const opt_variables_type& optvars, Vector<T>& dlogpsi,
+    Vector<T>& dhpsioverpsi, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const GradMatrix& grads_up, const GradMatrix& grads_dn,
+    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
+    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
+    const size_t NP1, const size_t NP2,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  bool recalculate(false);
-  for (int k = 0; k < this->myVars.size(); ++k)
-  {
-    int kk = this->myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (optvars.recompute(kk))
-      recalculate = true;
-  }
-  if (recalculate)
-  {
-    ParticleSet::ParticleGradient myG_temp, myG_J;
-    ParticleSet::ParticleLaplacian myL_temp, myL_J;
-    const int NP = P.getTotalNum();
-    myG_temp.resize(NP);
-    myG_temp = 0.0;
-    myL_temp.resize(NP);
-    myL_temp = 0.0;
-    myG_J.resize(NP);
-    myG_J = 0.0;
-    myL_J.resize(NP);
-    myL_J            = 0.0;
-    const size_t nmo = Phi->getOrbitalSetSize();
-    const size_t nel = P.last(0) - P.first(0);
-
-    const T* restrict C_p = Coeff.data();
-    for (int i = 0; i < Coeff.size(); i++)
-    {
-      const size_t upC = C2node_up[i];
-      const size_t dnC = C2node_dn[i];
-      const T tmp1     = C_p[i] * detValues_dn[dnC];
-      const T tmp2     = C_p[i] * detValues_up[upC];
-      for (size_t k = 0, j = N1; k < NP1; k++, j++)
-      {
-        myG_temp[j] += tmp1 * grads_up(upC, k);
-        myL_temp[j] += tmp1 * lapls_up(upC, k);
-      }
-      for (size_t k = 0, j = N2; k < NP2; k++, j++)
-      {
-        myG_temp[j] += tmp2 * grads_dn(dnC, k);
-        myL_temp[j] += tmp2 * lapls_dn(dnC, k);
-      }
+    bool recalculate(false);
+    for (int k = 0; k < this->myVars.size(); ++k) {
+        int kk = this->myVars.where(k);
+        if (kk < 0)
+            continue;
+        if (optvars.recompute(kk))
+            recalculate = true;
     }
+    if (recalculate) {
+        ParticleSet::ParticleGradient myG_temp, myG_J;
+        ParticleSet::ParticleLaplacian myL_temp, myL_J;
+        const int NP = P.getTotalNum();
+        myG_temp.resize(NP);
+        myG_temp = 0.0;
+        myL_temp.resize(NP);
+        myL_temp = 0.0;
+        myG_J.resize(NP);
+        myG_J = 0.0;
+        myL_J.resize(NP);
+        myL_J = 0.0;
+        const size_t nmo = Phi->getOrbitalSetSize();
+        const size_t nel = P.last(0) - P.first(0);
+
+        const T* restrict C_p = Coeff.data();
+        for (int i = 0; i < Coeff.size(); i++) {
+            const size_t upC = C2node_up[i];
+            const size_t dnC = C2node_dn[i];
+            const T tmp1 = C_p[i] * detValues_dn[dnC];
+            const T tmp2 = C_p[i] * detValues_up[upC];
+            for (size_t k = 0, j = N1; k < NP1; k++, j++) {
+                myG_temp[j] += tmp1 * grads_up(upC, k);
+                myL_temp[j] += tmp1 * lapls_up(upC, k);
+            }
+            for (size_t k = 0, j = N2; k < NP2; k++, j++) {
+                myG_temp[j] += tmp2 * grads_dn(dnC, k);
+                myL_temp[j] += tmp2 * lapls_dn(dnC, k);
+            }
+        }
 
-    myG_temp *= (1 / psiCurrent);
-    myL_temp *= (1 / psiCurrent);
+        myG_temp *= (1 / psiCurrent);
+        myL_temp *= (1 / psiCurrent);
+
+        // calculation of myG_J which will be used to represent
+        // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+        // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
+        // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
+        // \psi}{\psi} and this is what myL_J will hold
+        for (int iat = 0; iat < (myL_temp.size()); iat++) {
+            myG_J[iat] = (P.G[iat] - myG_temp[iat]);
+            myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+        }
 
-    // calculation of myG_J which will be used to represent
-    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
-    // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
-    // \psi}{\psi} and this is what myL_J will hold
-    for (int iat = 0; iat < (myL_temp.size()); iat++)
-    {
-      myG_J[iat] = (P.G[iat] - myG_temp[iat]);
-      myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
+        table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo,
+            psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn,
+            grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up,
+            Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
     }
-
-    table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn,
-                      detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn,
-                      B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
-                                            const opt_variables_type& optvars,
-                                            Vector<ValueType>& dlogpsi,
-                                            const ValueType& psiCurrent,
-                                            const std::vector<ValueType>& Coeff,
-                                            const std::vector<size_t>& C2node_up,
-                                            const std::vector<size_t>& C2node_dn,
-                                            const ValueVector& detValues_up,
-                                            const ValueVector& detValues_dn,
-                                            const ValueMatrix& M_up,
-                                            const ValueMatrix& M_dn,
-                                            const ValueMatrix& Minv_up,
-                                            const ValueMatrix& Minv_dn,
-                                            const std::vector<int>& detData_up,
-                                            const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSet& P,
+    const opt_variables_type& optvars, Vector<ValueType>& dlogpsi,
+    const ValueType& psiCurrent, const std::vector<ValueType>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const std::vector<int>& detData_up,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  bool recalculate(false);
-  for (int k = 0; k < this->myVars.size(); ++k)
-  {
-    int kk = this->myVars.where(k);
-    if (kk < 0)
-      continue;
-    if (optvars.recompute(kk))
-      recalculate = true;
-  }
-  if (recalculate)
-  {
-    const size_t nmo = Phi->getOrbitalSetSize();
-    const size_t nel = P.last(0) - P.first(0);
+    bool recalculate(false);
+    for (int k = 0; k < this->myVars.size(); ++k) {
+        int kk = this->myVars.where(k);
+        if (kk < 0)
+            continue;
+        if (optvars.recompute(kk))
+            recalculate = true;
+    }
+    if (recalculate) {
+        const size_t nmo = Phi->getOrbitalSetSize();
+        const size_t nel = P.last(0) - P.first(0);
 
-    table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up,
-                        M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl);
-  }
+        table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up,
+            C2node_dn, detValues_up, detValues_dn, M_up, M_dn, Minv_up, Minv_dn,
+            detData_up, lookup_tbl);
+    }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi,
-                                        Vector<T>& dhpsioverpsi,
-                                        const ParticleSet::ParticleLaplacian& myL_J,
-                                        const ParticleSet::ParticleGradient& myG_J,
-                                        const size_t nel,
-                                        const size_t nmo,
-                                        const T& psiCurrent,
-                                        const std::vector<T>& Coeff,
-                                        const std::vector<size_t>& C2node_up,
-                                        const std::vector<size_t>& C2node_dn,
-                                        const ValueVector& detValues_up,
-                                        const ValueVector& detValues_dn,
-                                        const GradMatrix& grads_up,
-                                        const GradMatrix& grads_dn,
-                                        const ValueMatrix& lapls_up,
-                                        const ValueMatrix& lapls_dn,
-                                        const ValueMatrix& M_up,
-                                        const ValueMatrix& M_dn,
-                                        const ValueMatrix& Minv_up,
-                                        const ValueMatrix& Minv_dn,
-                                        const GradMatrix& B_grad,
-                                        const ValueMatrix& B_lapl,
-                                        const std::vector<int>& detData_up,
-                                        const size_t N1,
-                                        const size_t N2,
-                                        const size_t NP1,
-                                        const size_t NP2,
-                                        const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi,
+    const ParticleSet::ParticleLaplacian& myL_J,
+    const ParticleSet::ParticleGradient& myG_J, const size_t nel,
+    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const GradMatrix& grads_up, const GradMatrix& grads_dn,
+    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
+    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
+    const size_t NP1, const size_t NP2,
+    const std::vector<std::vector<int>>& lookup_tbl)
 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GUIDE TO THE MATICES BEING BUILT
 ----------------------------------------------
@@ -1045,8 +1037,8 @@ determiant the table method is employed to calculate the contributions to the
 parameter derivatives (dhpsioverpsi/dlogpsi)
 
   loop through unquie determinants
-	loop through parameters
-	  evaluate contributaion to dlogpsi and dhpsioverpsi
+    loop through parameters
+      evaluate contributaion to dlogpsi and dhpsioverpsi
 \noindent
 
   BLAS GUIDE  for matrix multiplication of  [  alpha * A.B + beta * C = C ]
@@ -1062,16 +1054,16 @@ This notation is inspired by http://dx.doi.org/10.1063/1.4948778
 \newline
 \hfill\break
 $
-	A_{i,j}=\phi_j(r_{i}) \\
-	T = A^{-1} \widetilde{A} \\
-	B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
+    A_{i,j}=\phi_j(r_{i}) \\
+    T = A^{-1} \widetilde{A} \\
+    B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla
 \phi_{j}(r_{i})  + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\
-	\hat{O_{I}} = \hat{O}D_{I} \\
-	D_{I}=det(A_{I}) \newline
-	\psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
-	\Psi_{total} = \psi_{J}\psi_{MS} \\
-	\alpha_{I} = P^{T}_{I}TQ_{I} \\
-	M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
+    \hat{O_{I}} = \hat{O}D_{I} \\
+    D_{I}=det(A_{I}) \newline
+    \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\
+    \Psi_{total} = \psi_{J}\psi_{MS} \\
+    \alpha_{I} = P^{T}_{I}TQ_{I} \\
+    M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} -
 A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\
 $
 \newline
@@ -1090,10 +1082,10 @@ Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\
 Below is a translation of the shorthand I use to represent matrices independent
 of ``excitation matrix". \newline \hfill\break
 $
-	Y_{1} =  A^{-1}B   \\
-	Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
-	Y_{3} = A^{-1}\widetilde{B} \\
-	Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
+    Y_{1} =  A^{-1}B   \\
+    Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\
+    Y_{3} = A^{-1}\widetilde{B} \\
+    Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A}
 )\\
 $
 \newline
@@ -1106,14 +1098,14 @@ reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix.
 \newline
 \hfill\break
 $
-	Y_{5} = TQ \\
-	Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
-	Y_{7} = \alpha_{I}^{-1} P^{T} \\
-	Y_{11} = \widetilde{M}Q \\
-	Y_{23} = P^{T}\widetilde{M}Q \\
-	Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
-	Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
-	Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
+    Y_{5} = TQ \\
+    Y_{6} = (P^{T}TQ)^{-1} = \alpha_{I}^{-1}\\
+    Y_{7} = \alpha_{I}^{-1} P^{T} \\
+    Y_{11} = \widetilde{M}Q \\
+    Y_{23} = P^{T}\widetilde{M}Q \\
+    Y_{24} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q \\
+    Y_{25} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1} \\
+    Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\
 $
 \newline
 So far you will notice that I have not included up or down arrows to specify
@@ -1125,14 +1117,14 @@ derivatives. Of course the down spin expression can be retrieved by swapping the
 up and down arrows. I have dubbed any expression with lowercase p prefix as a
 "precursor" to an expression actually used... \newline \hfill\break
 $
-	\dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
-	\ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
-	pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
+    \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\
+    \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\
+    pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}]
 (Q\alpha_{I}^{-1}P^{T}) \\
-	pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
-	pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
+    pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\
+    pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I}
 \alpha_{I}^{-1}P^{T}) \\
 $
 \newline
@@ -1140,34 +1132,34 @@ Now these p matrices will be used to make various expressions via BLAS commands.
 \newline
 \hfill\break
 $
-	K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
-	K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
-	TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK2AiB = T.K2AiB = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\
-	K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2XA =  const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\
-	TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    TK2XA = T.K2XA = const0^{-1}  \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\
-	K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
+    K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
+    TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}}
 det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\
-	MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
+    MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1}  \sum_{I=1} \dot{C_{I}}
 det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\
-	K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+    K3T = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
 (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
+    TK3T = T.K3T  = const0^{-1}  \sum_{I=1} \ddot{C_{I}} det(\alpha_{I})
 (TQ\alpha_{I}^{-1}P^{T}T)\\ \\
-	K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
-	TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
+    K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\
+    TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T)
 \\ \\
-	K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
+    K5T =  const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}
 M_{I} \alpha_{I}^{-1}P^{T} T)  \\
-	TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
+    TK5T = T.K5T  = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1}
 M_{I} \alpha_{I}^{-1}P^{T} T)  \\
 $
 \newline
@@ -1187,500 +1179,524 @@ to each element will be called B_bar
 $
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
 {
-  ValueMatrix Table;
-  ValueMatrix Bbar;
-  ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
-  ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
-
-  Table.resize(nel, nmo);
-
-  Bbar.resize(nel, nmo);
-
-  Y1.resize(nel, nel);
-  Y2.resize(nel, nmo);
-  Y3.resize(nel, nmo);
-  Y4.resize(nel, nmo);
-
-  pK1.resize(nmo, nel);
-  K1T.resize(nmo, nmo);
-  TK1T.resize(nel, nmo);
-
-  pK2.resize(nmo, nel);
-  K2AiB.resize(nmo, nmo);
-  TK2AiB.resize(nel, nmo);
-  K2XA.resize(nmo, nmo);
-  TK2XA.resize(nel, nmo);
-  K2T.resize(nmo, nmo);
-  TK2T.resize(nel, nmo);
-  MK2T.resize(nel, nmo);
-
-  pK3.resize(nmo, nel);
-  K3T.resize(nmo, nmo);
-  TK3T.resize(nel, nmo);
-
-  pK5.resize(nmo, nel);
-  K5T.resize(nmo, nmo);
-  TK5T.resize(nel, nmo);
-
-  const int parameters_size(m_act_rot_inds.size());
-  const int parameter_start_index(0);
-
-  const size_t num_unique_up_dets(detValues_up.size());
-  const size_t num_unique_dn_dets(detValues_dn.size());
-
-  const T* restrict cptr = Coeff.data();
-  const size_t nc        = Coeff.size();
-  const size_t* restrict upC(C2node_up.data());
-  const size_t* restrict dnC(C2node_dn.data());
-  // B_grad holds the gradient operator
-  // B_lapl holds the laplacian operator
-  // B_bar will hold our special O operator
-
-  const int offset1(N1);
-  const int offset2(N2);
-  const int NPother(NP2);
-
-  T* T_(Table.data());
-
-  // possibly replace wit BLAS calls
-  for (int i = 0; i < nel; i++)
-    for (int j = 0; j < nmo; j++)
-      Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j);
-
-  const T* restrict B(Bbar.data());
-  const T* restrict A(M_up.data());
-  const T* restrict Ainv(Minv_up.data());
-  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-  // THIS CASE
-  //  The T matrix should be calculated and stored for use
-  //  T = A^{-1} \widetilde A
-  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-  // and that BLAS commands assume column major
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
-
-  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo);
-
-  // possibly replace with BLAS call
-  Y4 = Y3 - Y2;
-
-  // Need to create the constants: (Oi, const0, const1, const2)to take
-  // advantage of minimal BLAS commands; Oi is the special operator applied to
-  // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
-  //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
-  //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
-  std::vector<RealType> Oi(num_unique_dn_dets);
-
-  for (int index = 0; index < num_unique_dn_dets; index++)
-    for (int iat = 0; iat < NPother; iat++)
-      Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
-          myL_J[offset2 + iat] * detValues_dn[index];
-
-  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
-  // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
-  // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
-  // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
-  RealType const0(0.0), const1(0.0), const2(0.0);
-  for (size_t i = 0; i < nc; ++i)
-  {
-    const RealType c  = cptr[i];
-    const size_t up   = upC[i];
-    const size_t down = dnC[i];
-
-    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
-    const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
-  }
-
-  std::fill(pK1.begin(), pK1.end(), 0.0);
-  std::fill(pK2.begin(), pK2.end(), 0.0);
-  std::fill(pK3.begin(), pK3.end(), 0.0);
-  std::fill(pK5.begin(), pK5.end(), 0.0);
-
-  // Now we are going to loop through all unique determinants.
-  // The few lines above are for the reference matrix contribution.
-  // Although I start the loop below from index 0, the loop only performs
-  // actions when the index is >= 1 the detData object contains all the
-  // information about the P^T and Q matrices (projection matrices) needed in
-  // the table method
-  const int* restrict data_it = detData_up.data();
-  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
-  {
-    const int k = data_it[datum];
-
-    if (k == 0)
-    {
-      datum += 3 * k + 1;
+    ValueMatrix Table;
+    ValueMatrix Bbar;
+    ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
+    ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T,
+        MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
+
+    Table.resize(nel, nmo);
+
+    Bbar.resize(nel, nmo);
+
+    Y1.resize(nel, nel);
+    Y2.resize(nel, nmo);
+    Y3.resize(nel, nmo);
+    Y4.resize(nel, nmo);
+
+    pK1.resize(nmo, nel);
+    K1T.resize(nmo, nmo);
+    TK1T.resize(nel, nmo);
+
+    pK2.resize(nmo, nel);
+    K2AiB.resize(nmo, nmo);
+    TK2AiB.resize(nel, nmo);
+    K2XA.resize(nmo, nmo);
+    TK2XA.resize(nel, nmo);
+    K2T.resize(nmo, nmo);
+    TK2T.resize(nel, nmo);
+    MK2T.resize(nel, nmo);
+
+    pK3.resize(nmo, nel);
+    K3T.resize(nmo, nmo);
+    TK3T.resize(nel, nmo);
+
+    pK5.resize(nmo, nel);
+    K5T.resize(nmo, nmo);
+    TK5T.resize(nel, nmo);
+
+    const int parameters_size(m_act_rot_inds.size());
+    const int parameter_start_index(0);
+
+    const size_t num_unique_up_dets(detValues_up.size());
+    const size_t num_unique_dn_dets(detValues_dn.size());
+
+    const T* restrict cptr = Coeff.data();
+    const size_t nc = Coeff.size();
+    const size_t* restrict upC(C2node_up.data());
+    const size_t* restrict dnC(C2node_dn.data());
+    // B_grad holds the gradient operator
+    // B_lapl holds the laplacian operator
+    // B_bar will hold our special O operator
+
+    const int offset1(N1);
+    const int offset2(N2);
+    const int NPother(NP2);
+
+    T* T_(Table.data());
+
+    // possibly replace wit BLAS calls
+    for (int i = 0; i < nel; i++)
+        for (int j = 0; j < nmo; j++)
+            // FIXME Remove calls to real() when ParticleSet has been refactored
+            Bbar(i, j) = B_lapl(i, j) +
+                real(2.0 * dot(myG_J[i + offset1], B_grad(i, j))) +
+                real(myL_J[i + offset1]) * M_up(i, j);
+
+    const T* restrict B(Bbar.data());
+    const T* restrict A(M_up.data());
+    const T* restrict Ainv(Minv_up.data());
+    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+    // THIS CASE
+    //  The T matrix should be calculated and stored for use
+    //  T = A^{-1} \widetilde A
+    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+    // and that BLAS commands assume column major
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel,
+        RealType(0.0), T_, nmo);
+
+    BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel,
+        RealType(0.0), Y1.data(), nel);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel,
+        RealType(0.0), Y2.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel,
+        RealType(0.0), Y3.data(), nmo);
+
+    // possibly replace with BLAS call
+    Y4 = Y3 - Y2;
+
+    // Need to create the constants: (Oi, const0, const1, const2)to take
+    // advantage of minimal BLAS commands; Oi is the special operator applied to
+    // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
+    //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
+    //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
+    std::vector<RealType> Oi(num_unique_dn_dets);
+
+    for (int index = 0; index < num_unique_dn_dets; index++)
+        for (int iat = 0; iat < NPother; iat++)
+            // FIXME Remove call to real() when ParticleSet has been refactored
+            Oi[index] +=
+                real(lapls_dn(index, iat) +
+                    2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat])) +
+                real(myL_J[offset2 + iat]) * detValues_dn[index];
+
+    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
+    // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
+    // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
+    // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
+    RealType const0(0.0), const1(0.0), const2(0.0);
+    for (size_t i = 0; i < nc; ++i) {
+        const RealType c = cptr[i];
+        const size_t up = upC[i];
+        const size_t down = dnC[i];
+
+        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+        const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
     }
 
-    else
-    {
-      // Number of rows and cols of P^T
-      const int prows = k;
-      const int pcols = nel;
-      // Number of rows and cols of Q
-      const int qrows = nmo;
-      const int qcols = k;
-
-      Y5.resize(nel, k);
-      Y6.resize(k, k);
-
-      // Any matrix multiplication of P^T or Q is simply a projection
-      // Explicit matrix multiplication can be avoided; instead column or
-      // row copying can be done BlAS::copy(size of col/row being copied,
-      //            Matrix pointer + place to begin copying,
-      //            storage spacing (number of elements btw next row/col
-      //            element), Pointer to resultant matrix + place to begin
-      //            pasting, storage spacing of resultant matrix)
-      // For example the next 4 lines is the matrix multiplication of T*Q
-      // = Y5
-      std::fill(Y5.begin(), Y5.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
-      }
-
-      std::fill(Y6.begin(), Y6.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
-      }
-
-      Vector<T> WS;
-      Vector<IndexType> Piv;
-      WS.resize(k);
-      Piv.resize(k);
-      std::complex<RealType> logdet = 0.0;
-      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-      Y11.resize(nel, k);
-      Y23.resize(k, k);
-      Y24.resize(k, k);
-      Y25.resize(k, k);
-      Y26.resize(k, nel);
-
-      std::fill(Y11.begin(), Y11.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k);
-      }
-
-      std::fill(Y23.begin(), Y23.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1);
-      }
-
-      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k);
-      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k);
-
-      Y26.resize(k, nel);
-
-      std::fill(Y26.begin(), Y26.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      Y7.resize(k, nel);
-
-      std::fill(Y7.begin(), Y7.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      // c_Tr_AlphaI_MI is a constant contributing to constant const2
-      // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
-      RealType c_Tr_AlphaI_MI = 0.0;
-      for (int i = 0; i < k; i++)
-      {
-        c_Tr_AlphaI_MI += Y24(i, i);
-      }
-
-      for (int p = 0; p < lookup_tbl[index].size(); p++)
-      {
-        // el_p is the element position that contains information about
-        // the CI coefficient, and det up/dn values associated with the
-        // current unique determinant
-        const int el_p(lookup_tbl[index][p]);
-        const RealType c  = cptr[el_p];
-        const size_t up   = upC[el_p];
-        const size_t down = dnC[el_p];
-
-        const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
-        const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]);
-        const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]);
-
-        const2 += alpha_1;
-
-        for (int i = 0; i < k; i++)
-        {
-          BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-          BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+    std::fill(pK1.begin(), pK1.end(), 0.0);
+    std::fill(pK2.begin(), pK2.end(), 0.0);
+    std::fill(pK3.begin(), pK3.end(), 0.0);
+    std::fill(pK5.begin(), pK5.end(), 0.0);
+
+    // Now we are going to loop through all unique determinants.
+    // The few lines above are for the reference matrix contribution.
+    // Although I start the loop below from index 0, the loop only performs
+    // actions when the index is >= 1 the detData object contains all the
+    // information about the P^T and Q matrices (projection matrices) needed in
+    // the table method
+    const int* restrict data_it = detData_up.data();
+    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
+        const int k = data_it[datum];
+
+        if (k == 0) {
+            datum += 3 * k + 1;
+        }
+
+        else {
+            // Number of rows and cols of P^T
+            const int prows = k;
+            const int pcols = nel;
+            // Number of rows and cols of Q
+            const int qrows = nmo;
+            const int qcols = k;
+
+            Y5.resize(nel, k);
+            Y6.resize(k, k);
+
+            // Any matrix multiplication of P^T or Q is simply a projection
+            // Explicit matrix multiplication can be avoided; instead column or
+            // row copying can be done BlAS::copy(size of col/row being copied,
+            //            Matrix pointer + place to begin copying,
+            //            storage spacing (number of elements btw next row/col
+            //            element), Pointer to resultant matrix + place to begin
+            //            pasting, storage spacing of resultant matrix)
+            // For example the next 4 lines is the matrix multiplication of T*Q
+            // = Y5
+            std::fill(Y5.begin(), Y5.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
+                    Y5.data() + i, k);
+            }
+
+            std::fill(Y6.begin(), Y6.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y6.data() + i * k), 1);
+            }
+
+            Vector<T> WS;
+            Vector<IndexType> Piv;
+            WS.resize(k);
+            Piv.resize(k);
+            std::complex<RealType> logdet = 0.0;
+            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+            Y11.resize(nel, k);
+            Y23.resize(k, k);
+            Y24.resize(k, k);
+            Y25.resize(k, k);
+            Y26.resize(k, nel);
+
+            std::fill(Y11.begin(), Y11.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo,
+                    Y11.data() + i, k);
+            }
+
+            std::fill(Y23.begin(), Y23.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y23.data() + i * k), 1);
+            }
+
+            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k,
+                Y6.data(), k, RealType(0.0), Y24.data(), k);
+            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k,
+                Y24.data(), k, RealType(0.0), Y25.data(), k);
+
+            Y26.resize(k, nel);
+
+            std::fill(Y26.begin(), Y26.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y25.data() + i, k,
+                    Y26.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            Y7.resize(k, nel);
+
+            std::fill(Y7.begin(), Y7.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y6.data() + i, k,
+                    Y7.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            // c_Tr_AlphaI_MI is a constant contributing to constant const2
+            // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
+            RealType c_Tr_AlphaI_MI = 0.0;
+            for (int i = 0; i < k; i++) {
+                c_Tr_AlphaI_MI += Y24(i, i);
+            }
+
+            for (int p = 0; p < lookup_tbl[index].size(); p++) {
+                // el_p is the element position that contains information about
+                // the CI coefficient, and det up/dn values associated with the
+                // current unique determinant
+                const int el_p(lookup_tbl[index][p]);
+                const RealType c = cptr[el_p];
+                const size_t up = upC[el_p];
+                const size_t down = dnC[el_p];
+
+                const RealType alpha_1(c * detValues_dn[down] *
+                    detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
+                const RealType alpha_2(c * detValues_dn[down] *
+                    detValues_up[up] / detValues_up[0]);
+                const RealType alpha_3(
+                    c * Oi[down] * detValues_up[up] / detValues_up[0]);
+
+                const2 += alpha_1;
+
+                for (int i = 0; i < k; i++) {
+                    BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1,
+                        pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1,
+                        pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1,
+                        pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                    BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1,
+                        pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                }
+            }
+            datum += 3 * k + 1;
         }
-      }
-      datum += 3 * k + 1;
     }
-  }
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(),
-             nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(),
-             nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo);
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo);
-
-  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
-  {
-    int kk = this->myVars.where(k);
-    if (kk >= 0)
-    {
-      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-      if (i <= nel - 1 && j > nel - 1)
-      {
-        dhpsioverpsi[kk] +=
-            T(-0.5 * Y4(i, j) -
-              0.5 *
-                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
-                   K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) -
-                   const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) +
-                   K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j)));
-      }
-      else if (i <= nel - 1 && j <= nel - 1)
-      {
-        dhpsioverpsi[kk] +=
-            T(-0.5 * (Y4(i, j) - Y4(j, i)) -
-              0.5 *
-                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
-                   TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) +
-                   K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) +
-                   const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) +
-                   K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
-      }
-      else
-      {
-        dhpsioverpsi[kk] += T(-0.5 *
-                              (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i)
-
-                               + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) +
-                               K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i)));
-      }
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel,
+        RealType(0.0), K1T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo,
+        RealType(0.0), TK1T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo,
+        pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_,
+        nmo, RealType(0.0), TK2AiB.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo,
+        pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_,
+        nmo, RealType(0.0), TK2XA.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo,
+        pK2.data(), nel, RealType(0.0), K2T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo,
+        RealType(0.0), TK2T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo,
+        Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel,
+        RealType(0.0), K3T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo,
+        RealType(0.0), TK3T.data(), nmo);
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel,
+        RealType(0.0), K5T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo,
+        RealType(0.0), TK5T.data(), nmo);
+
+    for (int mu = 0, k = parameter_start_index;
+         k < (parameter_start_index + parameters_size); k++, mu++) {
+        int kk = this->myVars.where(k);
+        if (kk >= 0) {
+            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+            if (i <= nel - 1 && j > nel - 1) {
+                dhpsioverpsi[kk] += T(-0.5 * Y4(i, j) -
+                    0.5 *
+                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) -
+                            K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
+                            K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) -
+                            K1T(j, i) - TK1T(i, j) -
+                            const2 / const1 * K2T(i, j) +
+                            const2 / const1 * K2T(j, i) +
+                            const2 / const1 * TK2T(i, j) + K3T(i, j) -
+                            K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) +
+                            TK2T(i, j)));
+            }
+            else if (i <= nel - 1 && j <= nel - 1) {
+                dhpsioverpsi[kk] += T(-0.5 * (Y4(i, j) - Y4(j, i)) -
+                    0.5 *
+                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) +
+                            K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
+                            TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) +
+                            TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) +
+                            MK2T(j, i) + K1T(i, j) - K1T(j, i) - TK1T(i, j) +
+                            TK1T(j, i) - const2 / const1 * K2T(i, j) +
+                            const2 / const1 * K2T(j, i) +
+                            const2 / const1 * TK2T(i, j) -
+                            const2 / const1 * TK2T(j, i) + K3T(i, j) -
+                            K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) +
+                            K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
+            }
+            else {
+                dhpsioverpsi[kk] += T(-0.5 *
+                    (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) -
+                        K2XA(i, j) + K2XA(j, i)
+
+                        + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) +
+                        const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) -
+                        K2T(i, j) + K2T(j, i)));
+            }
+        }
     }
-  }
 }
 
-template<typename T>
-void RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi,
-                                          const size_t nel,
-                                          const size_t nmo,
-                                          const T& psiCurrent,
-                                          const std::vector<T>& Coeff,
-                                          const std::vector<size_t>& C2node_up,
-                                          const std::vector<size_t>& C2node_dn,
-                                          const ValueVector& detValues_up,
-                                          const ValueVector& detValues_dn,
-                                          const ValueMatrix& M_up,
-                                          const ValueMatrix& M_dn,
-                                          const ValueMatrix& Minv_up,
-                                          const ValueMatrix& Minv_dn,
-                                          const std::vector<int>& detData_up,
-                                          const std::vector<std::vector<int>>& lookup_tbl)
+template <typename T>
+void
+RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi, const size_t nel,
+    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
+    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
+    const ValueVector& detValues_up, const ValueVector& detValues_dn,
+    const ValueMatrix& M_up, const ValueMatrix& M_dn,
+    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
+    const std::vector<int>& detData_up,
+    const std::vector<std::vector<int>>& lookup_tbl)
 {
-  ValueMatrix Table;
-  ValueMatrix Y5, Y6, Y7;
-  ValueMatrix pK4, K4T, TK4T;
-
-  Table.resize(nel, nmo);
-
-  Bbar.resize(nel, nmo);
-
-  pK4.resize(nmo, nel);
-  K4T.resize(nmo, nmo);
-  TK4T.resize(nel, nmo);
-
-  const int parameters_size(m_act_rot_inds.size());
-  const int parameter_start_index(0);
-
-  const size_t num_unique_up_dets(detValues_up.size());
-  const size_t num_unique_dn_dets(detValues_dn.size());
-
-  const T* restrict cptr = Coeff.data();
-  const size_t nc        = Coeff.size();
-  const size_t* restrict upC(C2node_up.data());
-  const size_t* restrict dnC(C2node_dn.data());
-
-  T* T_(Table.data());
-
-  const T* restrict A(M_up.data());
-  const T* restrict Ainv(Minv_up.data());
-  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-  // THIS CASE
-  //  The T matrix should be calculated and stored for use
-  //  T = A^{-1} \widetilde A
-  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-  // and that BLAS commands assume column major
-  BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
-
-  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
-  RealType const0(0.0), const1(0.0), const2(0.0);
-  for (size_t i = 0; i < nc; ++i)
-  {
-    const RealType c  = cptr[i];
-    const size_t up   = upC[i];
-    const size_t down = dnC[i];
-
-    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
-  }
-
-  std::fill(pK4.begin(), pK4.end(), 0.0);
-
-  // Now we are going to loop through all unique determinants.
-  // The few lines above are for the reference matrix contribution.
-  // Although I start the loop below from index 0, the loop only performs
-  // actions when the index is >= 1 the detData object contains all the
-  // information about the P^T and Q matrices (projection matrices) needed in
-  // the table method
-  const int* restrict data_it = detData_up.data();
-  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
-  {
-    const int k = data_it[datum];
-
-    if (k == 0)
-    {
-      datum += 3 * k + 1;
+    ValueMatrix Table;
+    ValueMatrix Y5, Y6, Y7;
+    ValueMatrix pK4, K4T, TK4T;
+
+    Table.resize(nel, nmo);
+
+    Bbar.resize(nel, nmo);
+
+    pK4.resize(nmo, nel);
+    K4T.resize(nmo, nmo);
+    TK4T.resize(nel, nmo);
+
+    const int parameters_size(m_act_rot_inds.size());
+    const int parameter_start_index(0);
+
+    const size_t num_unique_up_dets(detValues_up.size());
+    const size_t num_unique_dn_dets(detValues_dn.size());
+
+    const T* restrict cptr = Coeff.data();
+    const size_t nc = Coeff.size();
+    const size_t* restrict upC(C2node_up.data());
+    const size_t* restrict dnC(C2node_dn.data());
+
+    T* T_(Table.data());
+
+    const T* restrict A(M_up.data());
+    const T* restrict Ainv(Minv_up.data());
+    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+    // THIS CASE
+    //  The T matrix should be calculated and stored for use
+    //  T = A^{-1} \widetilde A
+    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+    // and that BLAS commands assume column major
+    BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel,
+        RealType(0.0), T_, nmo);
+
+    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
+    RealType const0(0.0), const1(0.0), const2(0.0);
+    for (size_t i = 0; i < nc; ++i) {
+        const RealType c = cptr[i];
+        const size_t up = upC[i];
+        const size_t down = dnC[i];
+
+        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
     }
 
-    else
-    {
-      // Number of rows and cols of P^T
-      const int prows = k;
-      const int pcols = nel;
-      // Number of rows and cols of Q
-      const int qrows = nmo;
-      const int qcols = k;
-
-      Y5.resize(nel, k);
-      Y6.resize(k, k);
-
-      // Any matrix multiplication of P^T or Q is simply a projection
-      // Explicit matrix multiplication can be avoided; instead column or
-      // row copying can be done BlAS::copy(size of col/row being copied,
-      //            Matrix pointer + place to begin copying,
-      //            storage spacing (number of elements btw next row/col
-      //            element), Pointer to resultant matrix + place to begin
-      //            pasting, storage spacing of resultant matrix)
-      // For example the next 4 lines is the matrix multiplication of T*Q
-      // = Y5
-      std::fill(Y5.begin(), Y5.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
-      }
-
-      std::fill(Y6.begin(), Y6.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
-      }
-
-      Vector<T> WS;
-      Vector<IndexType> Piv;
-      WS.resize(k);
-      Piv.resize(k);
-      std::complex<RealType> logdet = 0.0;
-      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-      Y7.resize(k, nel);
-
-      std::fill(Y7.begin(), Y7.end(), 0.0);
-      for (int i = 0; i < k; i++)
-      {
-        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
-      }
-
-      for (int p = 0; p < lookup_tbl[index].size(); p++)
-      {
-        // el_p is the element position that contains information about
-        // the CI coefficient, and det up/dn values associated with the
-        // current unique determinant
-        const int el_p(lookup_tbl[index][p]);
-        const RealType c  = cptr[el_p];
-        const size_t up   = upC[el_p];
-        const size_t down = dnC[el_p];
-
-        const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent));
-
-        for (int i = 0; i < k; i++)
-        {
-          BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+    std::fill(pK4.begin(), pK4.end(), 0.0);
+
+    // Now we are going to loop through all unique determinants.
+    // The few lines above are for the reference matrix contribution.
+    // Although I start the loop below from index 0, the loop only performs
+    // actions when the index is >= 1 the detData object contains all the
+    // information about the P^T and Q matrices (projection matrices) needed in
+    // the table method
+    const int* restrict data_it = detData_up.data();
+    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
+        const int k = data_it[datum];
+
+        if (k == 0) {
+            datum += 3 * k + 1;
+        }
+
+        else {
+            // Number of rows and cols of P^T
+            const int prows = k;
+            const int pcols = nel;
+            // Number of rows and cols of Q
+            const int qrows = nmo;
+            const int qcols = k;
+
+            Y5.resize(nel, k);
+            Y6.resize(k, k);
+
+            // Any matrix multiplication of P^T or Q is simply a projection
+            // Explicit matrix multiplication can be avoided; instead column or
+            // row copying can be done BlAS::copy(size of col/row being copied,
+            //            Matrix pointer + place to begin copying,
+            //            storage spacing (number of elements btw next row/col
+            //            element), Pointer to resultant matrix + place to begin
+            //            pasting, storage spacing of resultant matrix)
+            // For example the next 4 lines is the matrix multiplication of T*Q
+            // = Y5
+            std::fill(Y5.begin(), Y5.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
+                    Y5.data() + i, k);
+            }
+
+            std::fill(Y6.begin(), Y6.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
+                    (Y6.data() + i * k), 1);
+            }
+
+            Vector<T> WS;
+            Vector<IndexType> Piv;
+            WS.resize(k);
+            Piv.resize(k);
+            std::complex<RealType> logdet = 0.0;
+            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+            Y7.resize(k, nel);
+
+            std::fill(Y7.begin(), Y7.end(), 0.0);
+            for (int i = 0; i < k; i++) {
+                BLAS::copy(k, Y6.data() + i, k,
+                    Y7.data() + (data_it[datum + 1 + i]), nel);
+            }
+
+            for (int p = 0; p < lookup_tbl[index].size(); p++) {
+                // el_p is the element position that contains information about
+                // the CI coefficient, and det up/dn values associated with the
+                // current unique determinant
+                const int el_p(lookup_tbl[index][p]);
+                const RealType c = cptr[el_p];
+                const size_t up = upC[el_p];
+                const size_t down = dnC[el_p];
+
+                const RealType alpha_4(c * detValues_dn[down] *
+                    detValues_up[up] * (1 / psiCurrent));
+
+                for (int i = 0; i < k; i++) {
+                    BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1,
+                        pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+                }
+            }
+            datum += 3 * k + 1;
         }
-      }
-      datum += 3 * k + 1;
     }
-  }
-
-  BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo);
-  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo);
-
-  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
-  {
-    int kk = this->myVars.where(k);
-    if (kk >= 0)
-    {
-      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-      if (i <= nel - 1 && j > nel - 1)
-      {
-        dlogpsi[kk] +=
-            T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
-      }
-      else if (i <= nel - 1 && j <= nel - 1)
-      {
-        dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) +
-                         (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
-      }
-      else
-      {
-        dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
-      }
+
+    BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel,
+        RealType(0.0), K4T.data(), nmo);
+    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo,
+        RealType(0.0), TK4T.data(), nmo);
+
+    for (int mu = 0, k = parameter_start_index;
+         k < (parameter_start_index + parameters_size); k++, mu++) {
+        int kk = this->myVars.where(k);
+        if (kk >= 0) {
+            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+            if (i <= nel - 1 && j > nel - 1) {
+                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j)) * const0 *
+                        (1 / psiCurrent) +
+                    (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
+            }
+            else if (i <= nel - 1 && j <= nel - 1) {
+                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) *
+                        const0 * (1 / psiCurrent) +
+                    (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
+            }
+            else {
+                dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
+            }
+        }
     }
-  }
 }
 
-template<typename T>
-std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
+template <typename T>
+std::unique_ptr<SPOSetT<T>>
+RotatedSPOsT<T>::makeClone() const
 {
-  auto myclone = std::make_unique<RotatedSPOsT>(SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
-
-  myclone->params          = this->params;
-  myclone->params_supplied = this->params_supplied;
-  myclone->m_act_rot_inds  = this->m_act_rot_inds;
-  myclone->m_full_rot_inds = this->m_full_rot_inds;
-  myclone->myVars          = this->myVars;
-  myclone->myVarsFull      = this->myVarsFull;
-  myclone->history_params_ = this->history_params_;
-  myclone->use_global_rot_ = this->use_global_rot_;
-  return myclone;
+    auto myclone = std::make_unique<RotatedSPOsT>(
+        SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
+
+    myclone->params = this->params;
+    myclone->params_supplied = this->params_supplied;
+    myclone->m_act_rot_inds = this->m_act_rot_inds;
+    myclone->m_full_rot_inds = this->m_full_rot_inds;
+    myclone->myVars = this->myVars;
+    myclone->myVarsFull = this->myVarsFull;
+    myclone->history_params_ = this->history_params_;
+    myclone->use_global_rot_ = this->use_global_rot_;
+    return myclone;
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
index b98952f7794..dd5781b365a 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
@@ -26,11 +26,10 @@
 #include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h"
 #include "QMCWaveFunctions/SPOSetScannerT.h"
 #if OHMMS_DIM == 3
+#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h"
 #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h"
-
 #if defined(QMC_COMPLEX)
 #include "QMCWaveFunctions/EinsplineSpinorSetBuilder.h"
-#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h"
 #endif
 
 #if defined(HAVE_EINSPLINE)
@@ -45,6 +44,29 @@
 
 namespace qmcplusplus
 {
+template <typename T>
+struct LCAOSpinorBuilderMaker
+{
+    template <typename... TArgs>
+    std::unique_ptr<LCAOSpinorBuilderT<T>>
+    operator()(TArgs&&...) const
+    {
+        throw std::runtime_error(
+            "lcao spinors not compatible with non-complex value types");
+    }
+};
+
+template <typename T>
+struct LCAOSpinorBuilderMaker<std::complex<T>>
+{
+    template <typename... TArgs>
+    std::unique_ptr<LCAOSpinorBuilderT<std::complex<T>>>
+    operator()(TArgs&&... args) const
+    {
+        return std::make_unique<LCAOSpinorBuilderT<std::complex<T>>>(
+            std::forward<TArgs>(args)...);
+    }
+};
 
 template <typename T>
 const SPOSetT<T>*
@@ -152,14 +174,15 @@ SPOSetBuilderFactoryT<T>::createSPOSetBuilder(xmlNodePtr rootNode)
             PRE.error("Missing basisset/@source.", true);
         else
             ions = pit->second.get();
-        if (targetPtcl.isSpinor())
-#ifdef QMC_COMPLEX
-            bb = std::make_unique<LCAOSpinorBuilderT<T>>(
-                targetPtcl, *ions, myComm, rootNode);
-#else
-            PRE.error("Use of lcao spinors requires QMC_COMPLEX=1.  Rebuild "
-                      "with this option");
-#endif
+        if (targetPtcl.isSpinor()) {
+            try {
+                bb = LCAOSpinorBuilderMaker<T>{}(
+                    targetPtcl, *ions, myComm, rootNode);
+            }
+            catch (const std::exception& e) {
+                PRE.error(e.what());
+            }
+        }
         else
             bb = std::make_unique<LCAOrbitalBuilderT<T>>(
                 targetPtcl, *ions, myComm, rootNode);
@@ -253,11 +276,11 @@ SPOSetBuilderFactoryT<T>::addSPOSet(std::unique_ptr<SPOSetT<T>> spo)
 template <typename T>
 std::string SPOSetBuilderFactoryT<T>::basisset_tag = "basisset";
 
-#ifdef QMC_COMPLEX
+// #ifdef QMC_COMPLEX
 template class SPOSetBuilderFactoryT<std::complex<double>>;
 template class SPOSetBuilderFactoryT<std::complex<float>>;
-#else
+// #else
 template class SPOSetBuilderFactoryT<double>;
 template class SPOSetBuilderFactoryT<float>;
-#endif
+// #endif
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h
index 9a3bb418a15..49885e54b44 100644
--- a/src/QMCWaveFunctions/SPOSetScannerT.h
+++ b/src/QMCWaveFunctions/SPOSetScannerT.h
@@ -1,6 +1,6 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
@@ -9,207 +9,275 @@
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef QMCPLUSPLUS_SPOSET_SCANNERT_H
 #define QMCPLUSPLUS_SPOSET_SCANNERT_H
 
+#include "OhmmsData/AttributeSet.h"
 #include "Particle/ParticleSet.h"
 #include "QMCWaveFunctions/OrbitalSetTraits.h"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "OhmmsData/AttributeSet.h"
 
 namespace qmcplusplus
 {
+template <typename T>
+struct OutputReportMakerBase
+{
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+
+    const ValueVector& SPO_v_avg;
+    const ValueVector& SPO_l_avg;
+    const GradVector& SPO_g_avg;
+    int nknots;
+};
+
+template <typename T>
+struct OutputReportMaker : OutputReportMakerBase<T>
+{
+    using RealType = typename SPOSetT<T>::RealType;
+
+    void
+    operator()(std::ofstream& output_report) const
+    {
+        output_report
+            << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg"
+            << std::endl;
+        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
+            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+            output_report << "\t" << iorb << "    " << std::scientific
+                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][0] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][1] * one_over_nknots << "   "
+                          << this->SPO_g_avg[iorb][2] * one_over_nknots << "   "
+                          << this->SPO_l_avg[iorb] * one_over_nknots
+                          << std::fixed << std::endl;
+        }
+    }
+};
+
+template <typename T>
+struct OutputReportMaker<std::complex<T>> :
+    OutputReportMakerBase<std::complex<T>>
+{
+    using RealType = typename SPOSetT<T>::RealType;
+
+    void
+    operator()(std::ofstream& output_report) const
+    {
+        output_report
+            << "#   Report: Orb   Value_avg I/R  Gradients_avg Laplacian_avg"
+            << std::endl;
+        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
+            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+            output_report << "\t" << iorb << "    " << std::scientific
+                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                          << this->SPO_v_avg[iorb].imag() /
+                    this->SPO_v_avg[iorb].real()
+                          << "   " << this->SPO_g_avg[iorb][0] * one_over_nknots
+                          << "   " << this->SPO_g_avg[iorb][1] * one_over_nknots
+                          << "   " << this->SPO_g_avg[iorb][2] * one_over_nknots
+                          << "   " << this->SPO_l_avg[iorb] * one_over_nknots
+                          << std::fixed << std::endl;
+        }
+    }
+};
+
 /** a scanner for all the SPO sets.
-   */
+ */
 template <typename T>
 class SPOSetScannerT
 {
 public:
-  using PtclPool    = std::map<std::string, const std::unique_ptr<ParticleSet>>;
-  using SPOSetMap   = typename SPOSetT<T>::SPOMap;
-  using RealType    = typename SPOSetT<T>::RealType;
-  using ValueVector = typename SPOSetT<T>::ValueVector;
-  using GradVector  = typename SPOSetT<T>::GradVector;
-  using HessVector  = typename SPOSetT<T>::HessVector;
-
-  RealType myfabs(RealType s) { return std::fabs(s); }
-  template<typename U>
-  std::complex<U> myfabs(std::complex<U>& s)
-  {
-    return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
-  }
-  template<typename U>
-  TinyVector<U, OHMMS_DIM> myfabs(TinyVector<U, OHMMS_DIM>& s)
-  {
-    return TinyVector<U, OHMMS_DIM>(myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
-  }
-
-  const SPOSetMap& sposets;
-  ParticleSet& target;
-  const PtclPool& ptcl_pool_;
-  ParticleSet* ions;
-
-  // construction/destruction
-  SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSet& targetPtcl, const PtclPool& psets)
-      : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){};
-  //~SPOSetScannerT(){};
-
-  // processing scanning
-  void put(xmlNodePtr cur)
-  {
-    app_log() << "Entering the SPO set scanner!" << std::endl;
-    // check in the source particle set and search for it in the pool.
-    std::string sourcePtcl("ion0");
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(sourcePtcl, "source");
-    aAttrib.put(cur);
-    auto pit(ptcl_pool_.find(sourcePtcl));
-    if (pit == ptcl_pool_.end())
-      app_log() << "Source particle set not found. Can not be used as reference point." << std::endl;
-    else
-      ions = pit->second.get();
-
-    // scanning the SPO sets
-    xmlNodePtr cur_save = cur;
-    for (const auto& [name, sposet] : sposets)
+    using PtclPool = std::map<std::string, const std::unique_ptr<ParticleSet>>;
+    using SPOSetMap = typename SPOSetT<T>::SPOMap;
+    using RealType = typename SPOSetT<T>::RealType;
+    using ValueVector = typename SPOSetT<T>::ValueVector;
+    using GradVector = typename SPOSetT<T>::GradVector;
+    using HessVector = typename SPOSetT<T>::HessVector;
+
+    RealType
+    myfabs(RealType s)
+    {
+        return std::fabs(s);
+    }
+    template <typename U>
+    std::complex<U>
+    myfabs(std::complex<U>& s)
+    {
+        return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
+    }
+    template <typename U>
+    TinyVector<U, OHMMS_DIM>
+    myfabs(TinyVector<U, OHMMS_DIM>& s)
+    {
+        return TinyVector<U, OHMMS_DIM>(
+            myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
+    }
+
+    const SPOSetMap& sposets;
+    ParticleSet& target;
+    const PtclPool& ptcl_pool_;
+    ParticleSet* ions;
+
+    // construction/destruction
+    SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSet& targetPtcl,
+        const PtclPool& psets) :
+        sposets(sposets_in),
+        target(targetPtcl),
+        ptcl_pool_(psets),
+        ions(0){};
+    //~SPOSetScannerT(){};
+
+    // processing scanning
+    void
+    put(xmlNodePtr cur)
     {
-      app_log() << "  Processing SPO " << sposet->getName() << std::endl;
-      // scanning the paths
-      cur = cur_save->children;
-      while (cur != NULL)
-      {
-        std::string trace_name("no name");
+        app_log() << "Entering the SPO set scanner!" << std::endl;
+        // check in the source particle set and search for it in the pool.
+        std::string sourcePtcl("ion0");
         OhmmsAttributeSet aAttrib;
-        aAttrib.add(trace_name, "name");
+        aAttrib.add(sourcePtcl, "source");
         aAttrib.put(cur);
-        std::string cname(getNodeName(cur));
-        std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name);
-        if (cname == "path")
-        {
-          app_log() << "    Scanning a " << cname << " called " << trace_name << " and writing to "
-                    << prefix + "_v/g/l/report.dat" << std::endl;
-          auto spo = sposet->makeClone();
-          scan_path(cur, *spo, prefix);
-        }
+        auto pit(ptcl_pool_.find(sourcePtcl));
+        if (pit == ptcl_pool_.end())
+            app_log() << "Source particle set not found. Can not be used as "
+                         "reference point."
+                      << std::endl;
         else
-        {
-          if (cname != "text" && cname != "comment")
-            app_log() << "    Unknown type of scanning " << cname << std::endl;
+            ions = pit->second.get();
+
+        // scanning the SPO sets
+        xmlNodePtr cur_save = cur;
+        for (const auto& [name, sposet] : sposets) {
+            app_log() << "  Processing SPO " << sposet->getName() << std::endl;
+            // scanning the paths
+            cur = cur_save->children;
+            while (cur != NULL) {
+                std::string trace_name("no name");
+                OhmmsAttributeSet aAttrib;
+                aAttrib.add(trace_name, "name");
+                aAttrib.put(cur);
+                std::string cname(getNodeName(cur));
+                std::string prefix(
+                    sposet->getName() + "_" + cname + "_" + trace_name);
+                if (cname == "path") {
+                    app_log() << "    Scanning a " << cname << " called "
+                              << trace_name << " and writing to "
+                              << prefix + "_v/g/l/report.dat" << std::endl;
+                    auto spo = sposet->makeClone();
+                    scan_path(cur, *spo, prefix);
+                }
+                else {
+                    if (cname != "text" && cname != "comment")
+                        app_log() << "    Unknown type of scanning " << cname
+                                  << std::endl;
+                }
+                cur = cur->next;
+            }
         }
-        cur = cur->next;
-      }
-    }
-    app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
-  }
-
-  // scanning a path
-  void scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
-  {
-    std::string file_name;
-    file_name = prefix + "_v.dat";
-    std::ofstream output_v(file_name.c_str());
-    file_name = prefix + "_g.dat";
-    std::ofstream output_g(file_name.c_str());
-    file_name = prefix + "_l.dat";
-    std::ofstream output_l(file_name.c_str());
-    file_name = prefix + "_report.dat";
-    std::ofstream output_report(file_name.c_str());
-
-    int nknots(2);
-    int from_atom(-1);
-    int to_atom(-1);
-    TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
-    TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
-
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(nknots, "nknots");
-    aAttrib.add(from_atom, "from_atom");
-    aAttrib.add(to_atom, "to_atom");
-    aAttrib.add(from_pos, "from_pos");
-    aAttrib.add(to_pos, "to_pos");
-    aAttrib.put(cur);
-
-    // sanity check
-    if (nknots < 2)
-      nknots = 2;
-    // check out the reference atom coordinates
-    if (ions)
-    {
-      if (from_atom >= 0 && from_atom < ions->R.size())
-        from_pos = ions->R[from_atom];
-      if (to_atom >= 0 && to_atom < ions->R.size())
-        to_pos = ions->R[to_atom];
+        app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
     }
 
-    // prepare a fake particle set
-    ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
-    GradVector SPO_g, SPO_g_avg;
-    int OrbitalSize(sposet.size());
-    SPO_v.resize(OrbitalSize);
-    SPO_g.resize(OrbitalSize);
-    SPO_l.resize(OrbitalSize);
-    SPO_v_avg.resize(OrbitalSize);
-    SPO_g_avg.resize(OrbitalSize);
-    SPO_l_avg.resize(OrbitalSize);
-    SPO_v_avg      = 0.0;
-    SPO_g_avg      = 0.0;
-    SPO_l_avg      = 0.0;
-    double Delta   = 1.0 / (nknots - 1);
-    int elec_count = target.R.size();
-    auto R_saved   = target.R;
-    ParticleSet::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
-    for (int icount = 0, ind = 0; icount < nknots; icount++, ind++)
+    // scanning a path
+    void
+    scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
     {
-      if (ind == elec_count)
-        ind = 0;
-      target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
-      target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
-      target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
-      target.makeMove(ind, zero_pos);
-      sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
-      std::ostringstream o;
-      o << "x_y_z  " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " "
-        << target.R[ind][2];
-      output_v << o.str() << " : " << std::scientific << std::setprecision(12);
-      output_g << o.str() << " : " << std::scientific << std::setprecision(12);
-      output_l << o.str() << " : " << std::scientific << std::setprecision(12);
-      for (int iorb = 0; iorb < OrbitalSize; iorb++)
-      {
-        SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
-        SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
-        SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
-        output_v << SPO_v[iorb] << "  ";
-        output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  " << SPO_g[iorb][2] << "  ";
-        output_l << SPO_l[iorb] << "  ";
-      }
-      output_v << std::endl;
-      output_g << std::endl;
-      output_l << std::endl;
+        std::string file_name;
+        file_name = prefix + "_v.dat";
+        std::ofstream output_v(file_name.c_str());
+        file_name = prefix + "_g.dat";
+        std::ofstream output_g(file_name.c_str());
+        file_name = prefix + "_l.dat";
+        std::ofstream output_l(file_name.c_str());
+        file_name = prefix + "_report.dat";
+        std::ofstream output_report(file_name.c_str());
+
+        int nknots(2);
+        int from_atom(-1);
+        int to_atom(-1);
+        TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
+        TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
+
+        OhmmsAttributeSet aAttrib;
+        aAttrib.add(nknots, "nknots");
+        aAttrib.add(from_atom, "from_atom");
+        aAttrib.add(to_atom, "to_atom");
+        aAttrib.add(from_pos, "from_pos");
+        aAttrib.add(to_pos, "to_pos");
+        aAttrib.put(cur);
+
+        // sanity check
+        if (nknots < 2)
+            nknots = 2;
+        // check out the reference atom coordinates
+        if (ions) {
+            if (from_atom >= 0 && from_atom < ions->R.size())
+                from_pos = ions->R[from_atom];
+            if (to_atom >= 0 && to_atom < ions->R.size())
+                to_pos = ions->R[to_atom];
+        }
+
+        // prepare a fake particle set
+        ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
+        GradVector SPO_g, SPO_g_avg;
+        int OrbitalSize(sposet.size());
+        SPO_v.resize(OrbitalSize);
+        SPO_g.resize(OrbitalSize);
+        SPO_l.resize(OrbitalSize);
+        SPO_v_avg.resize(OrbitalSize);
+        SPO_g_avg.resize(OrbitalSize);
+        SPO_l_avg.resize(OrbitalSize);
+        SPO_v_avg = 0.0;
+        SPO_g_avg = 0.0;
+        SPO_l_avg = 0.0;
+        double Delta = 1.0 / (nknots - 1);
+        int elec_count = target.R.size();
+        auto R_saved = target.R;
+        ParticleSet::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
+        for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) {
+            if (ind == elec_count)
+                ind = 0;
+            target.R[ind][0] =
+                (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
+            target.R[ind][1] =
+                (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
+            target.R[ind][2] =
+                (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
+            target.makeMove(ind, zero_pos);
+            sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
+            std::ostringstream o;
+            o << "x_y_z  " << std::fixed << std::setprecision(7)
+              << target.R[ind][0] << " " << target.R[ind][1] << " "
+              << target.R[ind][2];
+            output_v << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            output_g << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            output_l << o.str() << " : " << std::scientific
+                     << std::setprecision(12);
+            for (int iorb = 0; iorb < OrbitalSize; iorb++) {
+                SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
+                SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
+                SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
+                output_v << SPO_v[iorb] << "  ";
+                output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  "
+                         << SPO_g[iorb][2] << "  ";
+                output_l << SPO_l[iorb] << "  ";
+            }
+            output_v << std::endl;
+            output_g << std::endl;
+            output_l << std::endl;
+        }
+        // restore the whole target.
+        target.R = R_saved;
+        target.update();
+        OutputReportMaker<T>{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(
+            output_report);
+        output_v.close();
+        output_g.close();
+        output_l.close();
+        output_report.close();
     }
-    // restore the whole target.
-    target.R = R_saved;
-    target.update();
-#ifdef QMC_COMPLEX
-    output_report << "#   Report: Orb   Value_avg I/R  Gradients_avg  Laplacian_avg" << std::endl;
-#else
-    output_report << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg" << std::endl;
-#endif
-    for (int iorb = 0; iorb < OrbitalSize; iorb++)
-      output_report << "\t" << iorb << "    " << std::scientific
-                    << SPO_v_avg[iorb] * static_cast<RealType>(1.0 / nknots) << "   "
-#ifdef QMC_COMPLEX
-                    << SPO_v_avg[iorb].imag() / SPO_v_avg[iorb].real() << "   "
-#endif
-                    << SPO_g_avg[iorb][0] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_g_avg[iorb][1] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_g_avg[iorb][2] * static_cast<RealType>(1.0 / nknots) << "   "
-                    << SPO_l_avg[iorb] * static_cast<RealType>(1.0 / nknots) << std::fixed << std::endl;
-    output_v.close();
-    output_g.close();
-    output_l.close();
-    output_report.close();
-  }
 };
 } // namespace qmcplusplus